Files
test/source/blender/draw/intern/draw_command.cc
Clément Foucault 20d09435ab DRW: Add split visibility and command generation function
This adds the possibility to the user code to
group visibility and command generation compute
dispatch together to reduce the overhead of
PSO and pipeline switch.

This PR also make passes that have been submitted
(i.e.: that generated commands) as read-only.

This allows to automatically remove redundant
command generation when submitting the same
pass with the same view multiple times, or,
remove redundant visibility calculation for the
same view in multiple passes. This automation
is done for the common usage of the API (using `submit`).

Custom usage (using `submit_only`) needs to abide by
the rule of the API and update visibility and commands
when required.

Pull Request: https://projects.blender.org/blender/blender/pulls/129170
2024-10-21 13:32:54 +02:00

854 lines
28 KiB
C++

/* SPDX-FileCopyrightText: 2022 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
/** \file
* \ingroup draw
*/
#include "GPU_batch.hh"
#include "GPU_capabilities.hh"
#include "GPU_compute.hh"
#include "GPU_debug.hh"
#include "draw_command.hh"
#include "draw_pass.hh"
#include "draw_shader.hh"
#include "draw_view.hh"
#include <bitset>
#include <sstream>
namespace blender::draw::command {
static gpu::Batch *procedural_batch_get(GPUPrimType primitive)
{
switch (primitive) {
case GPU_PRIM_POINTS:
return drw_cache_procedural_points_get();
case GPU_PRIM_LINES:
return drw_cache_procedural_lines_get();
case GPU_PRIM_TRIS:
return drw_cache_procedural_triangles_get();
case GPU_PRIM_TRI_STRIP:
return drw_cache_procedural_triangle_strips_get();
default:
/* Add new one as needed. */
BLI_assert_unreachable();
return nullptr;
}
}
/* -------------------------------------------------------------------- */
/** \name Commands Execution
* \{ */
void ShaderBind::execute(RecordingState &state) const
{
if (assign_if_different(state.shader, shader)) {
GPU_shader_bind(shader);
}
}
void FramebufferBind::execute() const
{
GPU_framebuffer_bind(*framebuffer);
}
void SubPassTransition::execute() const
{
/* TODO(fclem): Require framebuffer bind to always be part of the pass so that we can track it
* inside RecordingState. */
GPUFrameBuffer *framebuffer = GPU_framebuffer_active_get();
/* Unpack to the real enum type. */
const GPUAttachmentState states[9] = {
GPUAttachmentState(depth_state),
GPUAttachmentState(color_states[0]),
GPUAttachmentState(color_states[1]),
GPUAttachmentState(color_states[2]),
GPUAttachmentState(color_states[3]),
GPUAttachmentState(color_states[4]),
GPUAttachmentState(color_states[5]),
GPUAttachmentState(color_states[6]),
GPUAttachmentState(color_states[7]),
};
GPU_framebuffer_subpass_transition_array(framebuffer, states, ARRAY_SIZE(states));
}
void ResourceBind::execute() const
{
if (slot == -1) {
return;
}
switch (type) {
case ResourceBind::Type::Sampler:
GPU_texture_bind_ex(is_reference ? *texture_ref : texture, sampler, slot);
break;
case ResourceBind::Type::BufferSampler:
GPU_vertbuf_bind_as_texture(is_reference ? *vertex_buf_ref : vertex_buf, slot);
break;
case ResourceBind::Type::Image:
GPU_texture_image_bind(is_reference ? *texture_ref : texture, slot);
break;
case ResourceBind::Type::UniformBuf:
GPU_uniformbuf_bind(is_reference ? *uniform_buf_ref : uniform_buf, slot);
break;
case ResourceBind::Type::StorageBuf:
GPU_storagebuf_bind(is_reference ? *storage_buf_ref : storage_buf, slot);
break;
case ResourceBind::Type::UniformAsStorageBuf:
GPU_uniformbuf_bind_as_ssbo(is_reference ? *uniform_buf_ref : uniform_buf, slot);
break;
case ResourceBind::Type::VertexAsStorageBuf:
GPU_vertbuf_bind_as_ssbo(is_reference ? *vertex_buf_ref : vertex_buf, slot);
break;
case ResourceBind::Type::IndexAsStorageBuf:
GPU_indexbuf_bind_as_ssbo(is_reference ? *index_buf_ref : index_buf, slot);
break;
}
}
void PushConstant::execute(RecordingState &state) const
{
if (location == -1) {
return;
}
switch (type) {
case PushConstant::Type::IntValue:
GPU_shader_uniform_int_ex(state.shader, location, comp_len, array_len, int4_value);
break;
case PushConstant::Type::IntReference:
GPU_shader_uniform_int_ex(state.shader, location, comp_len, array_len, int_ref);
break;
case PushConstant::Type::FloatValue:
GPU_shader_uniform_float_ex(state.shader, location, comp_len, array_len, float4_value);
break;
case PushConstant::Type::FloatReference:
GPU_shader_uniform_float_ex(state.shader, location, comp_len, array_len, float_ref);
break;
}
}
void SpecializeConstant::execute() const
{
/* All specialization constants should exist as they are not optimized out like uniforms. */
BLI_assert(location != -1);
switch (type) {
case SpecializeConstant::Type::IntValue:
GPU_shader_constant_int_ex(shader, location, int_value);
break;
case SpecializeConstant::Type::IntReference:
GPU_shader_constant_int_ex(shader, location, *int_ref);
break;
case SpecializeConstant::Type::UintValue:
GPU_shader_constant_uint_ex(shader, location, uint_value);
break;
case SpecializeConstant::Type::UintReference:
GPU_shader_constant_uint_ex(shader, location, *uint_ref);
break;
case SpecializeConstant::Type::FloatValue:
GPU_shader_constant_float_ex(shader, location, float_value);
break;
case SpecializeConstant::Type::FloatReference:
GPU_shader_constant_float_ex(shader, location, *float_ref);
break;
case SpecializeConstant::Type::BoolValue:
GPU_shader_constant_bool_ex(shader, location, bool_value);
break;
case SpecializeConstant::Type::BoolReference:
GPU_shader_constant_bool_ex(shader, location, *bool_ref);
break;
}
}
void Draw::execute(RecordingState &state) const
{
state.front_facing_set(handle.has_inverted_handedness());
if (GPU_shader_draw_parameters_support() == false) {
GPU_batch_resource_id_buf_set(batch, state.resource_id_buf);
}
if (is_primitive_expansion()) {
/* Expanded drawcall. */
IndexRange vert_range = GPU_batch_draw_expanded_parameter_get(
batch, GPUPrimType(expand_prim_type), vertex_len, vertex_first);
IndexRange expanded_range = {vert_range.start() * expand_prim_len,
vert_range.size() * expand_prim_len};
if (expanded_range.is_empty()) {
/* Nothing to draw, and can lead to asserts in GPU_batch_bind_as_resources. */
return;
}
GPU_batch_bind_as_resources(batch, state.shader);
gpu::Batch *gpu_batch = procedural_batch_get(GPUPrimType(expand_prim_type));
GPU_batch_set_shader(gpu_batch, state.shader);
GPU_batch_draw_advanced(
gpu_batch, expanded_range.start(), expanded_range.size(), 0, instance_len);
}
else {
/* Regular drawcall. */
GPU_batch_set_shader(batch, state.shader);
GPU_batch_draw_advanced(batch, vertex_first, vertex_len, 0, instance_len);
}
}
void DrawMulti::execute(RecordingState &state) const
{
DrawMultiBuf::DrawCommandBuf &indirect_buf = multi_draw_buf->command_buf_;
DrawMultiBuf::DrawGroupBuf &groups = multi_draw_buf->group_buf_;
uint group_index = this->group_first;
while (group_index != uint(-1)) {
const DrawGroup &group = groups[group_index];
if (group.vertex_len > 0) {
gpu::Batch *batch = group.desc.gpu_batch;
if (GPUPrimType(group.desc.expand_prim_type) != GPU_PRIM_NONE) {
/* Bind original batch as resource and use a procedural batch to issue the draw-call. */
GPU_batch_bind_as_resources(group.desc.gpu_batch, state.shader);
batch = procedural_batch_get(GPUPrimType(group.desc.expand_prim_type));
}
if (GPU_shader_draw_parameters_support() == false) {
GPU_batch_resource_id_buf_set(batch, state.resource_id_buf);
}
GPU_batch_set_shader(batch, state.shader);
constexpr intptr_t stride = sizeof(DrawCommand);
/* We have 2 indirect command reserved per draw group. */
intptr_t offset = stride * group_index * 2;
/* Draw negatively scaled geometry first. */
if (group.len - group.front_facing_len > 0) {
state.front_facing_set(true);
GPU_batch_draw_indirect(batch, indirect_buf, offset);
}
if (group.front_facing_len > 0) {
state.front_facing_set(false);
GPU_batch_draw_indirect(batch, indirect_buf, offset + stride);
}
}
group_index = group.next;
}
}
void DrawIndirect::execute(RecordingState &state) const
{
state.front_facing_set(handle.has_inverted_handedness());
GPU_batch_draw_indirect(batch, *indirect_buf, 0);
}
void Dispatch::execute(RecordingState &state) const
{
if (is_reference) {
GPU_compute_dispatch(state.shader, size_ref->x, size_ref->y, size_ref->z);
}
else {
GPU_compute_dispatch(state.shader, size.x, size.y, size.z);
}
}
void DispatchIndirect::execute(RecordingState &state) const
{
GPU_compute_dispatch_indirect(state.shader, *indirect_buf);
}
void Barrier::execute() const
{
GPU_memory_barrier(type);
}
void Clear::execute() const
{
GPUFrameBuffer *fb = GPU_framebuffer_active_get();
GPU_framebuffer_clear(fb, (eGPUFrameBufferBits)clear_channels, color, depth, stencil);
}
void ClearMulti::execute() const
{
GPUFrameBuffer *fb = GPU_framebuffer_active_get();
GPU_framebuffer_multi_clear(fb, (const float(*)[4])colors);
}
void StateSet::execute(RecordingState &recording_state) const
{
/**
* Does not support locked state for the moment and never should.
* Better implement a less hacky selection!
*/
BLI_assert(DST.state_lock == 0);
bool state_changed = assign_if_different(recording_state.pipeline_state, new_state);
bool clip_changed = assign_if_different(recording_state.clip_plane_count, clip_plane_count);
if (!state_changed && !clip_changed) {
return;
}
/* Keep old API working. Keep the state tracking in sync. */
/* TODO(fclem): Move at the end of a pass. */
DST.state = new_state;
GPU_state_set(to_write_mask(new_state),
to_blend(new_state),
to_face_cull_test(new_state),
to_depth_test(new_state),
to_stencil_test(new_state),
to_stencil_op(new_state),
to_provoking_vertex(new_state));
if (new_state & DRW_STATE_SHADOW_OFFSET) {
GPU_shadow_offset(true);
}
else {
GPU_shadow_offset(false);
}
/* TODO: this should be part of shader state. */
GPU_clip_distances(recording_state.clip_plane_count);
if (new_state & DRW_STATE_IN_FRONT_SELECT) {
/* XXX `GPU_depth_range` is not a perfect solution
* since very distant geometries can still be occluded.
* Also the depth test precision of these geometries is impaired.
* However, it solves the selection for the vast majority of cases. */
GPU_depth_range(0.0f, 0.01f);
}
else {
GPU_depth_range(0.0f, 1.0f);
}
if (new_state & DRW_STATE_PROGRAM_POINT_SIZE) {
GPU_program_point_size(true);
}
else {
GPU_program_point_size(false);
}
}
void StencilSet::execute() const
{
GPU_stencil_write_mask_set(write_mask);
GPU_stencil_compare_mask_set(compare_mask);
GPU_stencil_reference_set(reference);
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Commands Serialization for debugging
* \{ */
std::string ShaderBind::serialize() const
{
return std::string(".shader_bind(") + GPU_shader_get_name(shader) + ")";
}
std::string FramebufferBind::serialize() const
{
return std::string(".framebuffer_bind(") +
(*framebuffer == nullptr ? "nullptr" : GPU_framebuffer_get_name(*framebuffer)) + ")";
}
std::string SubPassTransition::serialize() const
{
auto to_str = [](GPUAttachmentState state) {
return (state != GPU_ATTACHMENT_IGNORE) ?
((state == GPU_ATTACHMENT_WRITE) ? "write" : "read") :
"ignore";
};
return std::string(".subpass_transition(\n") +
"depth=" + to_str(GPUAttachmentState(depth_state)) + ",\n" +
"color0=" + to_str(GPUAttachmentState(color_states[0])) + ",\n" +
"color1=" + to_str(GPUAttachmentState(color_states[1])) + ",\n" +
"color2=" + to_str(GPUAttachmentState(color_states[2])) + ",\n" +
"color3=" + to_str(GPUAttachmentState(color_states[3])) + ",\n" +
"color4=" + to_str(GPUAttachmentState(color_states[4])) + ",\n" +
"color5=" + to_str(GPUAttachmentState(color_states[5])) + ",\n" +
"color6=" + to_str(GPUAttachmentState(color_states[6])) + ",\n" +
"color7=" + to_str(GPUAttachmentState(color_states[7])) + "\n)";
}
std::string ResourceBind::serialize() const
{
switch (type) {
case Type::Sampler:
return std::string(".bind_texture") + (is_reference ? "_ref" : "") + "(" +
std::to_string(slot) + ", sampler=" + sampler.to_string() + ")";
case Type::BufferSampler:
return std::string(".bind_vertbuf_as_texture") + (is_reference ? "_ref" : "") + "(" +
std::to_string(slot) + ")";
case Type::Image:
return std::string(".bind_image") + (is_reference ? "_ref" : "") + "(" +
std::to_string(slot) + ")";
case Type::UniformBuf:
return std::string(".bind_uniform_buf") + (is_reference ? "_ref" : "") + "(" +
std::to_string(slot) + ")";
case Type::StorageBuf:
return std::string(".bind_storage_buf") + (is_reference ? "_ref" : "") + "(" +
std::to_string(slot) + ")";
case Type::UniformAsStorageBuf:
return std::string(".bind_uniform_as_ssbo") + (is_reference ? "_ref" : "") + "(" +
std::to_string(slot) + ")";
case Type::VertexAsStorageBuf:
return std::string(".bind_vertbuf_as_ssbo") + (is_reference ? "_ref" : "") + "(" +
std::to_string(slot) + ")";
case Type::IndexAsStorageBuf:
return std::string(".bind_indexbuf_as_ssbo") + (is_reference ? "_ref" : "") + "(" +
std::to_string(slot) + ")";
default:
BLI_assert_unreachable();
return "";
}
}
std::string PushConstant::serialize() const
{
std::stringstream ss;
for (int i = 0; i < array_len; i++) {
switch (comp_len) {
case 1:
switch (type) {
case Type::IntValue:
ss << int1_value;
break;
case Type::IntReference:
ss << int_ref[i];
break;
case Type::FloatValue:
ss << float1_value;
break;
case Type::FloatReference:
ss << float_ref[i];
break;
}
break;
case 2:
switch (type) {
case Type::IntValue:
ss << int2_value;
break;
case Type::IntReference:
ss << int2_ref[i];
break;
case Type::FloatValue:
ss << float2_value;
break;
case Type::FloatReference:
ss << float2_ref[i];
break;
}
break;
case 3:
switch (type) {
case Type::IntValue:
ss << int3_value;
break;
case Type::IntReference:
ss << int3_ref[i];
break;
case Type::FloatValue:
ss << float3_value;
break;
case Type::FloatReference:
ss << float3_ref[i];
break;
}
break;
case 4:
switch (type) {
case Type::IntValue:
ss << int4_value;
break;
case Type::IntReference:
ss << int4_ref[i];
break;
case Type::FloatValue:
ss << float4_value;
break;
case Type::FloatReference:
ss << float4_ref[i];
break;
}
break;
case 16:
switch (type) {
case Type::IntValue:
case Type::IntReference:
BLI_assert_unreachable();
break;
case Type::FloatValue:
ss << float4x4(
(&float4_value)[0], (&float4_value)[1], (&float4_value)[2], (&float4_value)[3]);
break;
case Type::FloatReference:
ss << *float4x4_ref;
break;
}
break;
}
if (i < array_len - 1) {
ss << ", ";
}
}
return std::string(".push_constant(") + std::to_string(location) + ", data=" + ss.str() + ")";
}
std::string SpecializeConstant::serialize() const
{
std::stringstream ss;
switch (type) {
case Type::IntValue:
ss << int_value;
break;
case Type::UintValue:
ss << uint_value;
break;
case Type::FloatValue:
ss << float_value;
break;
case Type::BoolValue:
ss << bool_value;
break;
case Type::IntReference:
ss << *int_ref;
break;
case Type::UintReference:
ss << *uint_ref;
break;
case Type::FloatReference:
ss << *float_ref;
break;
case Type::BoolReference:
ss << *bool_ref;
break;
}
return std::string(".specialize_constant(") + std::to_string(location) + ", data=" + ss.str() +
")";
}
std::string Draw::serialize() const
{
std::string inst_len = std::to_string(instance_len);
std::string vert_len = (vertex_len == uint(-1)) ? "from_batch" : std::to_string(vertex_len);
std::string vert_first = (vertex_first == uint(-1)) ? "from_batch" :
std::to_string(vertex_first);
return std::string(".draw(inst_len=") + inst_len + ", vert_len=" + vert_len +
", vert_first=" + vert_first + ", res_id=" + std::to_string(handle.resource_index()) +
")";
}
std::string DrawMulti::serialize(const std::string &line_prefix) const
{
DrawMultiBuf::DrawGroupBuf &groups = multi_draw_buf->group_buf_;
MutableSpan<DrawPrototype> prototypes(multi_draw_buf->prototype_buf_.data(),
multi_draw_buf->prototype_count_);
/* This emulates the GPU sorting but without the unstable draw order. */
std::sort(
prototypes.begin(), prototypes.end(), [](const DrawPrototype &a, const DrawPrototype &b) {
return (a.group_id < b.group_id) ||
(a.group_id == b.group_id && a.resource_handle > b.resource_handle);
});
/* Compute prefix sum to have correct offsets. */
uint prefix_sum = 0u;
for (DrawGroup &group : groups) {
group.start = prefix_sum;
prefix_sum += group.front_facing_counter + group.back_facing_counter;
}
std::stringstream ss;
uint group_len = 0;
uint group_index = this->group_first;
while (group_index != uint(-1)) {
const DrawGroup &grp = groups[group_index];
ss << std::endl << line_prefix << " .group(id=" << group_index << ", len=" << grp.len << ")";
intptr_t offset = grp.start;
if (grp.back_facing_counter > 0) {
for (DrawPrototype &proto : prototypes.slice_safe({offset, grp.back_facing_counter})) {
BLI_assert(proto.group_id == group_index);
ResourceHandle handle(proto.resource_handle);
BLI_assert(handle.has_inverted_handedness());
ss << std::endl
<< line_prefix << " .proto(instance_len=" << std::to_string(proto.instance_len)
<< ", resource_id=" << std::to_string(handle.resource_index()) << ", back_face)";
}
offset += grp.back_facing_counter;
}
if (grp.front_facing_counter > 0) {
for (DrawPrototype &proto : prototypes.slice_safe({offset, grp.front_facing_counter})) {
BLI_assert(proto.group_id == group_index);
ResourceHandle handle(proto.resource_handle);
BLI_assert(!handle.has_inverted_handedness());
ss << std::endl
<< line_prefix << " .proto(instance_len=" << std::to_string(proto.instance_len)
<< ", resource_id=" << std::to_string(handle.resource_index()) << ", front_face)";
}
}
group_index = grp.next;
group_len++;
}
ss << std::endl;
return line_prefix + ".draw_multi(" + std::to_string(group_len) + ")" + ss.str();
}
std::string DrawIndirect::serialize() const
{
return std::string(".draw_indirect()");
}
std::string Dispatch::serialize() const
{
int3 sz = is_reference ? *size_ref : size;
return std::string(".dispatch") + (is_reference ? "_ref" : "") + "(" + std::to_string(sz.x) +
", " + std::to_string(sz.y) + ", " + std::to_string(sz.z) + ")";
}
std::string DispatchIndirect::serialize() const
{
return std::string(".dispatch_indirect()");
}
std::string Barrier::serialize() const
{
/* TODO(@fclem): Better serialization... */
return std::string(".barrier(") + std::to_string(type) + ")";
}
std::string Clear::serialize() const
{
std::stringstream ss;
if (eGPUFrameBufferBits(clear_channels) & GPU_COLOR_BIT) {
ss << "color=" << color;
if (eGPUFrameBufferBits(clear_channels) & (GPU_DEPTH_BIT | GPU_STENCIL_BIT)) {
ss << ", ";
}
}
if (eGPUFrameBufferBits(clear_channels) & GPU_DEPTH_BIT) {
ss << "depth=" << depth;
if (eGPUFrameBufferBits(clear_channels) & GPU_STENCIL_BIT) {
ss << ", ";
}
}
if (eGPUFrameBufferBits(clear_channels) & GPU_STENCIL_BIT) {
ss << "stencil=0b" << std::bitset<8>(stencil) << ")";
}
return std::string(".clear(") + ss.str() + ")";
}
std::string ClearMulti::serialize() const
{
std::stringstream ss;
for (float4 color : Span<float4>(colors, colors_len)) {
ss << color << ", ";
}
return std::string(".clear_multi(colors={") + ss.str() + "})";
}
std::string StateSet::serialize() const
{
/* TODO(@fclem): Better serialization... */
return std::string(".state_set(") + std::to_string(new_state) + ")";
}
std::string StencilSet::serialize() const
{
std::stringstream ss;
ss << ".stencil_set(write_mask=0b" << std::bitset<8>(write_mask) << ", reference=0b"
<< std::bitset<8>(reference) << ", compare_mask=0b" << std::bitset<8>(compare_mask) << ")";
return ss.str();
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Commands buffers binding / command / resource ID generation
* \{ */
void DrawCommandBuf::finalize_commands(Vector<Header, 0> &headers,
Vector<Undetermined, 0> &commands,
SubPassVector &sub_passes,
uint &resource_id_count,
ResourceIdBuf &resource_id_buf)
{
for (const Header &header : headers) {
if (header.type == Type::SubPass) {
/** WARNING: Recursive. */
auto &sub = sub_passes[int64_t(header.index)];
finalize_commands(
sub.headers_, sub.commands_, sub_passes, resource_id_count, resource_id_buf);
}
if (header.type != Type::Draw) {
continue;
}
Draw &cmd = commands[header.index].draw;
int batch_vert_len, batch_vert_first, batch_base_index, batch_inst_len;
/* Now that GPUBatches are guaranteed to be finished, extract their parameters. */
GPU_batch_draw_parameter_get(
cmd.batch, &batch_vert_len, &batch_vert_first, &batch_base_index, &batch_inst_len);
/* Instancing attributes are not supported using the new pipeline since we use the base
* instance to set the correct resource_id. Workaround is a storage_buf + gl_InstanceID. */
BLI_assert(batch_inst_len == 1);
if (cmd.vertex_len == uint(-1)) {
cmd.vertex_len = batch_vert_len;
}
if (cmd.handle.raw > 0) {
/* Save correct offset to start of resource_id buffer region for this draw. */
uint instance_first = resource_id_count;
resource_id_count += cmd.instance_len;
/* Ensure the buffer is big enough. */
resource_id_buf.get_or_resize(resource_id_count - 1);
/* Copy the resource id for all instances. */
uint index = cmd.handle.resource_index();
for (int i = instance_first; i < (instance_first + cmd.instance_len); i++) {
resource_id_buf[i] = index;
}
}
}
}
void DrawCommandBuf::generate_commands(Vector<Header, 0> &headers,
Vector<Undetermined, 0> &commands,
SubPassVector &sub_passes)
{
resource_id_count_ = 0;
finalize_commands(headers, commands, sub_passes, resource_id_count_, resource_id_buf_);
resource_id_buf_.push_update();
}
void DrawCommandBuf::bind(RecordingState &state)
{
if (GPU_shader_draw_parameters_support() == false) {
state.resource_id_buf = resource_id_buf_;
}
else {
GPU_storagebuf_bind(resource_id_buf_, DRW_RESOURCE_ID_SLOT);
}
}
void DrawMultiBuf::generate_commands(Vector<Header, 0> & /*headers*/,
Vector<Undetermined, 0> & /*commands*/,
VisibilityBuf &visibility_buf,
int visibility_word_per_draw,
int view_len,
bool use_custom_ids)
{
GPU_debug_group_begin("DrawMultiBuf.bind");
resource_id_count_ = 0u;
for (DrawGroup &group : MutableSpan<DrawGroup>(group_buf_.data(), group_count_)) {
/* Compute prefix sum of all instance of previous group. */
group.start = resource_id_count_;
resource_id_count_ += group.len;
int batch_vert_len, batch_vert_first, batch_base_index, batch_inst_len;
/* Now that GPUBatches are guaranteed to be finished, extract their parameters. */
GPU_batch_draw_parameter_get(group.desc.gpu_batch,
&batch_vert_len,
&batch_vert_first,
&batch_base_index,
&batch_inst_len);
group.vertex_len = group.desc.vertex_len == 0 ? batch_vert_len : group.desc.vertex_len;
group.vertex_first = group.desc.vertex_first == -1 ? batch_vert_first :
group.desc.vertex_first;
group.base_index = batch_base_index;
/* Instancing attributes are not supported using the new pipeline since we use the base
* instance to set the correct resource_id. Workaround is a storage_buf + gl_InstanceID. */
BLI_assert(batch_inst_len == 1);
UNUSED_VARS_NDEBUG(batch_inst_len);
if (group.desc.expand_prim_type != GPU_PRIM_NONE) {
/* Expanded drawcall. */
IndexRange vert_range = GPU_batch_draw_expanded_parameter_get(
group.desc.gpu_batch,
GPUPrimType(group.desc.expand_prim_type),
group.vertex_len,
group.vertex_first);
group.vertex_first = vert_range.start() * group.desc.expand_prim_len;
group.vertex_len = vert_range.size() * group.desc.expand_prim_len;
/* Override base index to -1 as the generated drawcall will not use an index buffer and do
* the indirection manually inside the shader. */
group.base_index = -1;
}
/* Reset counters to 0 for the GPU. */
group.total_counter = group.front_facing_counter = group.back_facing_counter = 0;
}
group_buf_.push_update();
prototype_buf_.push_update();
/* Allocate enough for the expansion pass. */
resource_id_buf_.get_or_resize(resource_id_count_ * view_len * (use_custom_ids ? 2 : 1));
/* Two commands per group (inverted and non-inverted scale). */
command_buf_.get_or_resize(group_count_ * 2);
if (prototype_count_ > 0) {
GPUShader *shader = DRW_shader_draw_command_generate_get();
GPU_shader_bind(shader);
GPU_shader_uniform_1i(shader, "prototype_len", prototype_count_);
GPU_shader_uniform_1i(shader, "visibility_word_per_draw", visibility_word_per_draw);
GPU_shader_uniform_1i(shader, "view_len", view_len);
GPU_shader_uniform_1i(shader, "view_shift", log2_ceil_u(view_len));
GPU_shader_uniform_1b(shader, "use_custom_ids", use_custom_ids);
GPU_storagebuf_bind(group_buf_, GPU_shader_get_ssbo_binding(shader, "group_buf"));
GPU_storagebuf_bind(visibility_buf, GPU_shader_get_ssbo_binding(shader, "visibility_buf"));
GPU_storagebuf_bind(prototype_buf_, GPU_shader_get_ssbo_binding(shader, "prototype_buf"));
GPU_storagebuf_bind(command_buf_, GPU_shader_get_ssbo_binding(shader, "command_buf"));
GPU_storagebuf_bind(resource_id_buf_, DRW_RESOURCE_ID_SLOT);
GPU_compute_dispatch(shader, divide_ceil_u(prototype_count_, DRW_COMMAND_GROUP_SIZE), 1, 1);
/* TODO(@fclem): Investigate moving the barrier in the bind function. */
if (GPU_shader_draw_parameters_support() == false) {
GPU_memory_barrier(GPU_BARRIER_VERTEX_ATTRIB_ARRAY);
}
else {
GPU_memory_barrier(GPU_BARRIER_SHADER_STORAGE);
}
GPU_storagebuf_sync_as_indirect_buffer(command_buf_);
}
GPU_debug_group_end();
}
void DrawMultiBuf::bind(RecordingState &state)
{
if (GPU_shader_draw_parameters_support() == false) {
state.resource_id_buf = resource_id_buf_;
}
else {
GPU_storagebuf_bind(resource_id_buf_, DRW_RESOURCE_ID_SLOT);
}
}
/** \} */
}; // namespace blender::draw::command