Draw: Avoid temporary copy for mesh triangulation index buffer
The mesh triangulation data is stored in CPU memory with the same format as the triangles GPU index buffer. Because of that we can skip creating a temporary copied owned by the GPU API. One way to do that is to just upload the data directly and avoid keeping a reference to it. However, we can only upload GPU data from the main thread with OpenGL, so instead reference the data and keep track of whether to free it. When drawing a mesh with a single material and 1.8 million faces, this change gives a 12-15% improvement in framerate, from about 32 to 37 FPS. Part of #116901. Pull Request: https://projects.blender.org/blender/blender/pulls/122175
This commit is contained in:
@@ -23,7 +23,14 @@ static void extract_tris_mesh(const MeshRenderData &mr,
|
||||
const Span<int3> corner_tris = mr.mesh->corner_tris();
|
||||
if (!face_sorted.face_tri_offsets) {
|
||||
/* There are no hidden faces and no reordering is necessary to group triangles with the same
|
||||
* material. The corner indices from #Mesh::corner_tris() can be copied directly to the GPU. */
|
||||
* material. The corner indices from #Mesh::corner_tris() can be copied directly to the GPU
|
||||
* without the usual CPU-side copy owned by the index buffer. Crucially, this assumes that the
|
||||
* data is uploaded to the GPU *before* the dependency graph's evaluated state is cleared (and
|
||||
* with it, the evaluated mesh's triangulation data).
|
||||
*
|
||||
* Eventually these local copies should be completely removed, and code should write directly
|
||||
* to GPU memory, but even then it could be helpful to know that the data already exists
|
||||
* contiguously, owned elsewhere by Blender. */
|
||||
BLI_assert(face_sorted.visible_tris_num == corner_tris.size());
|
||||
GPU_indexbuf_build_in_place_from_memory(&ibo,
|
||||
GPU_PRIM_TRIS,
|
||||
|
||||
@@ -53,6 +53,8 @@ class IndexBuf {
|
||||
bool is_subrange_ = false;
|
||||
/** True if buffer only contains restart indices. */
|
||||
bool is_empty_ = false;
|
||||
/** #data_ is read-only, not owned by an index buffer. */
|
||||
bool reference_data_ = false;
|
||||
|
||||
union {
|
||||
/** Mapped buffer data. non-NULL indicates not yet sent to VRAM. */
|
||||
@@ -70,7 +72,8 @@ class IndexBuf {
|
||||
uint min_index,
|
||||
uint max_index,
|
||||
GPUPrimType prim_type,
|
||||
bool uses_restart_indices);
|
||||
bool uses_restart_indices,
|
||||
bool reference_data);
|
||||
void init_subrange(IndexBuf *elem_src, uint start, uint length);
|
||||
void init_build_on_device(uint index_len);
|
||||
|
||||
|
||||
@@ -315,13 +315,15 @@ void IndexBuf::init(uint indices_len,
|
||||
uint min_index,
|
||||
uint max_index,
|
||||
GPUPrimType prim_type,
|
||||
bool uses_restart_indices)
|
||||
bool uses_restart_indices,
|
||||
bool reference_data)
|
||||
{
|
||||
is_init_ = true;
|
||||
data_ = indices;
|
||||
index_start_ = 0;
|
||||
index_len_ = indices_len;
|
||||
is_empty_ = min_index > max_index;
|
||||
reference_data_ = reference_data;
|
||||
|
||||
/* Patch index buffer to remove restart indices from
|
||||
* non-restart-compatible primitive types. Restart indices
|
||||
@@ -492,7 +494,8 @@ void GPU_indexbuf_build_in_place(GPUIndexBufBuilder *builder, IndexBuf *elem)
|
||||
builder->index_min,
|
||||
builder->index_max,
|
||||
builder->prim_type,
|
||||
builder->uses_restart_indices);
|
||||
builder->uses_restart_indices,
|
||||
false);
|
||||
builder->data = nullptr;
|
||||
}
|
||||
|
||||
@@ -510,7 +513,8 @@ void GPU_indexbuf_build_in_place_ex(GPUIndexBufBuilder *builder,
|
||||
index_min,
|
||||
index_max,
|
||||
builder->prim_type,
|
||||
uses_restart_indices);
|
||||
uses_restart_indices,
|
||||
false);
|
||||
builder->data = nullptr;
|
||||
}
|
||||
|
||||
@@ -522,15 +526,16 @@ void GPU_indexbuf_build_in_place_from_memory(IndexBuf *ibo,
|
||||
const int32_t index_max,
|
||||
const bool uses_restart_indices)
|
||||
{
|
||||
/* If restart indices are used, they need to be stripped on Metal which would require a copy. */
|
||||
BLI_assert(!uses_restart_indices);
|
||||
const uint32_t indices_num = data_len * indices_per_primitive(prim_type);
|
||||
/* TODO: The need for this copy is meant to be temporary. The data should be uploaded directly to
|
||||
* the GPU here rather than copied to an array owned by the IBO first. */
|
||||
uint32_t *copy = static_cast<uint32_t *>(
|
||||
MEM_malloc_arrayN(indices_num, sizeof(uint32_t), __func__));
|
||||
threading::memory_bandwidth_bound_task(sizeof(uint32_t) * indices_num * 2, [&]() {
|
||||
array_utils::copy(Span(data, indices_num), MutableSpan(copy, indices_num));
|
||||
});
|
||||
ibo->init(indices_num, copy, index_min, index_max, prim_type, uses_restart_indices);
|
||||
ibo->init(indices_num,
|
||||
const_cast<uint32_t *>(data),
|
||||
index_min,
|
||||
index_max,
|
||||
prim_type,
|
||||
uses_restart_indices,
|
||||
true);
|
||||
}
|
||||
|
||||
void GPU_indexbuf_create_subrange_in_place(IndexBuf *elem,
|
||||
|
||||
@@ -139,7 +139,10 @@ void MTLIndexBuf::upload_data()
|
||||
}
|
||||
|
||||
/* No need to keep copy of data_ in system memory. */
|
||||
if (data_) {
|
||||
if (reference_data_) {
|
||||
data_ = nullptr;
|
||||
}
|
||||
else {
|
||||
MEM_SAFE_FREE(data_);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -36,7 +36,12 @@ void GLIndexBuf::bind()
|
||||
/* Sends data to GPU. */
|
||||
glBufferData(GL_ELEMENT_ARRAY_BUFFER, size, data_, GL_STATIC_DRAW);
|
||||
/* No need to keep copy of data in system memory. */
|
||||
MEM_SAFE_FREE(data_);
|
||||
if (reference_data_) {
|
||||
data_ = nullptr;
|
||||
}
|
||||
else {
|
||||
MEM_SAFE_FREE(data_);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -33,7 +33,12 @@ void VKIndexBuffer::ensure_updated()
|
||||
VKStagingBuffer staging_buffer(buffer_, VKStagingBuffer::Direction::HostToDevice);
|
||||
staging_buffer.host_buffer_get().update(data_);
|
||||
staging_buffer.copy_to_device(context);
|
||||
MEM_SAFE_FREE(data_);
|
||||
if (reference_data_) {
|
||||
data_ = nullptr;
|
||||
}
|
||||
else {
|
||||
MEM_SAFE_FREE(data_);
|
||||
}
|
||||
}
|
||||
|
||||
void VKIndexBuffer::upload_data()
|
||||
|
||||
Reference in New Issue
Block a user