DRW: Reduce complexity of Draw primitive expansion

This fixes an issue with `finalize_commands`, simplify
packing inside `Draw` command struct and remove the vertex
length limitation, replacing it with instance count limit
which is less likely to be used.
This commit is contained in:
Clément Foucault
2024-08-07 09:09:47 +02:00
parent 12f3e23326
commit 91de9acc61
2 changed files with 23 additions and 64 deletions

View File

@@ -172,16 +172,22 @@ void Draw::execute(RecordingState &state) const
if (is_primitive_expansion()) {
/* Expanded drawcall. */
IndexRange vert_range = GPU_batch_draw_expanded_parameter_get(
batch, GPUPrimType(expand_prim_type), vertex_len, vertex_first);
IndexRange expanded_range = {vert_range.start() * expand_prim_len,
vert_range.size() * expand_prim_len};
GPU_batch_bind_as_resources(batch, state.shader);
gpu::Batch *gpu_batch = procedural_batch_get(GPUPrimType(expand.prim_type));
gpu::Batch *gpu_batch = procedural_batch_get(GPUPrimType(expand_prim_type));
GPU_batch_set_shader(gpu_batch, state.shader);
GPU_batch_draw_advanced(gpu_batch, vertex_first, expand.vertex_len, 0, instance_len_get());
GPU_batch_draw_advanced(
gpu_batch, expanded_range.start(), expanded_range.size(), 0, instance_len);
}
else {
/* Regular drawcall. */
GPU_batch_set_shader(batch, state.shader);
GPU_batch_draw_advanced(batch, vertex_first, vertex_len, 0, instance_len_get());
GPU_batch_draw_advanced(batch, vertex_first, vertex_len, 0, instance_len);
}
}
@@ -529,9 +535,8 @@ std::string SpecializeConstant::serialize() const
std::string Draw::serialize() const
{
std::string inst_len = std::to_string(instance_len_get());
std::string vert_len = (vertex_len_get() == uint(-1)) ? "from_batch" :
std::to_string(vertex_len);
std::string inst_len = std::to_string(instance_len);
std::string vert_len = (vertex_len == uint(-1)) ? "from_batch" : std::to_string(vertex_len);
std::string vert_first = (vertex_first == uint(-1)) ? "from_batch" :
std::to_string(vertex_first);
return std::string(".draw(inst_len=") + inst_len + ", vert_len=" + vert_len +
@@ -704,22 +709,8 @@ void DrawCommandBuf::finalize_commands(Vector<Header, 0> &headers,
* instance to set the correct resource_id. Workaround is a storage_buf + gl_InstanceID. */
BLI_assert(batch_inst_len == 1);
uint32_t vert_len = cmd.vertex_len_get();
if (vert_len == uint(-1)) {
vert_len = batch_vert_len;
}
if (cmd.is_primitive_expansion()) {
IndexRange vert_range = GPU_batch_draw_expanded_parameter_get(
cmd.batch, GPUPrimType(cmd.expand.prim_type), vert_len, cmd.vertex_first);
IndexRange expanded_range = {vert_range.start() * cmd.expand.prim_len,
vert_range.size() * cmd.expand.prim_len};
BLI_assert(expanded_range.size() < (1 << 25));
cmd.expand.vertex_len = expanded_range.size();
cmd.vertex_first = expanded_range.start();
}
else {
cmd.vertex_len = vert_len;
if (cmd.vertex_len == uint(-1)) {
cmd.vertex_len = batch_vert_len;
}
#ifdef WITH_METAL_BACKEND

View File

@@ -350,19 +350,11 @@ struct SpecializeConstant {
struct Draw {
gpu::Batch *batch;
/* Negative instance count denote expanded draw. */
int32_t instance_len;
uint16_t instance_len;
uint8_t expand_prim_type; /* #GPUPrimType */
uint8_t expand_prim_len;
uint32_t vertex_first;
union {
/* Ugly packing to support expanded draws without inflating the struct.
* Makes vertex range restricted to smaller range for expanded draw. */
struct {
uint32_t prim_type : 4;
uint32_t prim_len : 3;
uint32_t vertex_len : 25;
} expand;
uint32_t vertex_len;
};
uint32_t vertex_len;
ResourceHandle handle;
#ifdef WITH_METAL_BACKEND
/* Shader is required for extracting SSBO vertex fetch expansion parameters during draw command
@@ -388,41 +380,17 @@ struct Draw {
#ifdef WITH_METAL_BACKEND
this->shader = shader;
#endif
BLI_assert((instance_len > 0) && (instance_len < ~uint32_t(0)));
if (expanded_prim_type != GPU_PRIM_NONE) {
this->instance_len = -int32_t(instance_len);
BLI_assert(expanded_prim_type < (1 << 4));
BLI_assert(expanded_prim_len < (1 << 3));
BLI_assert(vertex_len == uint(-1) || vertex_len < (1 << 25));
BLI_assert(vertex_len != 0);
this->expand.prim_type = expanded_prim_type;
this->expand.prim_len = expanded_prim_len;
/* Cannot store auto vertex len value, store it as 0 as this is an invalid input here. */
this->expand.vertex_len = (vertex_len == uint(-1)) ? 0 : vertex_len;
}
else {
this->instance_len = instance_len;
this->vertex_len = vertex_len;
}
BLI_assert(instance_len < SHRT_MAX);
this->instance_len = uint16_t(instance_len);
this->vertex_len = vertex_len;
this->vertex_first = vertex_first;
this->expand_prim_type = expanded_prim_type;
this->expand_prim_len = expanded_prim_len;
}
bool is_primitive_expansion() const
{
return instance_len < 0;
}
uint32_t instance_len_get() const
{
return is_primitive_expansion() ? -instance_len : instance_len;
}
uint32_t vertex_len_get() const
{
if (is_primitive_expansion()) {
return (expand.vertex_len == 0) ? uint(-1) : vertex_len;
}
return vertex_len;
return expand_prim_type != GPU_PRIM_NONE;
}
void execute(RecordingState &state) const;