This allows multiple threads to request different specializations without locking usage of all specialized shaders program when a new specialization is being compiled. The specialization constants are bundled in a structure that is being passed to the `Shader::bind()` method. The structure is owned by the calling thread and only used by the `Shader::bind()`. Only querying for the specialized shader (Map lookup) is locking the shader usage. The variant compilation is now also locking and ensured that multiple thread trying to compile the same variant will never result in race condition. Note that this removes the `is_dirty` optimization. This can be added back if this becomes a bottleneck in the future. Otherwise, the performance impact is not noticeable. Pull Request: https://projects.blender.org/blender/blender/pulls/136991
54 lines
1.5 KiB
C++
54 lines
1.5 KiB
C++
/* SPDX-FileCopyrightText: 2023 Blender Authors
|
|
*
|
|
* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
|
|
/** \file
|
|
* \ingroup gpu
|
|
*
|
|
* GPU Compute Pipeline
|
|
*
|
|
* Allows to dispatch compute shader tasks on the GPU.
|
|
* Every dispatch is sent to the active `GPUContext`.
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include "BLI_sys_types.h"
|
|
|
|
#include "GPU_shader.hh"
|
|
#include "GPU_storage_buffer.hh"
|
|
|
|
/**
|
|
* Dispatch a compute shader task.
|
|
* The number of work groups (aka thread groups) is bounded by `GPU_max_work_group_count()` which
|
|
* might be different in each of the 3 dimensions.
|
|
*/
|
|
void GPU_compute_dispatch(
|
|
GPUShader *shader,
|
|
uint groups_x_len,
|
|
uint groups_y_len,
|
|
uint groups_z_len,
|
|
const blender::gpu::shader::SpecializationConstants *constants_state = nullptr);
|
|
|
|
/**
|
|
* Dispatch a compute shader task. The size of the dispatch is sourced from a \a indirect_buf
|
|
* which must follow this layout:
|
|
* \code{.c}
|
|
* struct DispatchIndirectCommand {
|
|
* uint groups_x_len;
|
|
* uint groups_y_len;
|
|
* uint groups_z_len;
|
|
* };
|
|
* \encode
|
|
*
|
|
* \note The writes to the \a indirect_buf do not need to be synchronized as a memory barrier is
|
|
* emitted internally.
|
|
*
|
|
* The number of work groups (aka thread groups) is bounded by `GPU_max_work_group_count()` which
|
|
* might be different in each of the 3 dimensions.
|
|
*/
|
|
void GPU_compute_dispatch_indirect(
|
|
GPUShader *shader,
|
|
GPUStorageBuf *indirect_buf,
|
|
const blender::gpu::shader::SpecializationConstants *constants_state = nullptr);
|