Files
test2/source/blender/gpu/intern/gpu_pass.cc
Clément Foucault fb3904ce45 GPU: Shader Codegen: Split different graph includes
This allows to reduce the number of includes for each
tree graph (surface, volume, displacement) and
reduce the code size significantly for most vertex
shaders, speeding up compile time.

Rel #145347

Pull Request: https://projects.blender.org/blender/blender/pulls/146419
2025-09-22 10:24:10 +02:00

398 lines
11 KiB
C++

/* SPDX-FileCopyrightText: 2025 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
/** \file
* \ingroup gpu
*
* Convert material node-trees to GLSL.
*/
#include "MEM_guardedalloc.h"
#include "BLI_map.hh"
#include "BLI_span.hh"
#include "BLI_time.h"
#include "BLI_vector.hh"
#include "GPU_capabilities.hh"
#include "GPU_context.hh"
#include "GPU_pass.hh"
#include "GPU_vertex_format.hh"
#include "gpu_codegen.hh"
#include <mutex>
#include <string>
using namespace blender;
using namespace blender::gpu::shader;
static bool gpu_pass_validate(GPUCodegenCreateInfo *create_info);
/* -------------------------------------------------------------------- */
/** \name GPUPass
* \{ */
struct GPUPass {
static inline std::atomic<uint64_t> compilation_counts = 0;
GPUCodegenCreateInfo *create_info = nullptr;
BatchHandle compilation_handle = 0;
std::atomic<blender::gpu::Shader *> shader = nullptr;
std::atomic<GPUPassStatus> status = GPU_PASS_QUEUED;
/* Orphaned GPUPasses gets freed by the garbage collector. */
std::atomic<int> refcount = 1;
double creation_timestamp = 0.0f;
/* The last time the refcount was greater than 0. */
double gc_timestamp = 0.0f;
uint64_t compilation_timestamp = 0;
/** Hint that an optimized variant of this pass should be created.
* Based on a complexity heuristic from pass code generation. */
bool should_optimize = false;
bool is_optimization_pass = false;
/* Number of seconds after creation required before compiling an optimization pass. */
static constexpr float optimization_delay = 10.0f;
GPUPass(GPUCodegenCreateInfo *info,
bool deferred_compilation,
bool is_optimization_pass,
bool should_optimize)
: create_info(info),
creation_timestamp(BLI_time_now_seconds()),
should_optimize(should_optimize),
is_optimization_pass(is_optimization_pass)
{
BLI_assert(!is_optimization_pass || !should_optimize);
if (is_optimization_pass && deferred_compilation) {
// Defer until all non optimization passes are compiled.
return;
}
GPUShaderCreateInfo *base_info = reinterpret_cast<GPUShaderCreateInfo *>(create_info);
if (deferred_compilation) {
compilation_handle = GPU_shader_batch_create_from_infos(
Span<GPUShaderCreateInfo *>(&base_info, 1), compilation_priority());
}
else {
shader = GPU_shader_create_from_info(base_info);
finalize_compilation();
}
}
~GPUPass()
{
if (compilation_handle) {
GPU_shader_batch_cancel(compilation_handle);
}
else {
BLI_assert(create_info == nullptr || (is_optimization_pass && status == GPU_PASS_QUEUED));
}
MEM_delete(create_info);
GPU_SHADER_FREE_SAFE(shader);
}
CompilationPriority compilation_priority()
{
return is_optimization_pass ? CompilationPriority::Low : CompilationPriority::Medium;
}
void finalize_compilation()
{
BLI_assert_msg(create_info, "GPUPass::finalize_compilation() called more than once.");
if (compilation_handle) {
shader = GPU_shader_batch_finalize(compilation_handle).first();
}
compilation_timestamp = ++compilation_counts;
if (!shader && !gpu_pass_validate(create_info)) {
fprintf(stderr, "blender::gpu::Shader: error: too many samplers in shader.\n");
}
status = shader ? GPU_PASS_SUCCESS : GPU_PASS_FAILED;
MEM_delete(create_info);
create_info = nullptr;
}
void update(double timestamp)
{
update_compilation(timestamp);
update_gc_timestamp(timestamp);
}
void update_compilation(double timestamp)
{
if (compilation_handle) {
if (GPU_shader_batch_is_ready(compilation_handle)) {
finalize_compilation();
}
}
else if (status == GPU_PASS_QUEUED && refcount > 0 &&
((creation_timestamp + optimization_delay) <= timestamp))
{
BLI_assert(is_optimization_pass);
GPUShaderCreateInfo *base_info = reinterpret_cast<GPUShaderCreateInfo *>(create_info);
compilation_handle = GPU_shader_batch_create_from_infos(
Span<GPUShaderCreateInfo *>(&base_info, 1), compilation_priority());
}
}
void update_gc_timestamp(double timestamp)
{
if (refcount != 0 || gc_timestamp == 0.0f) {
gc_timestamp = timestamp;
}
}
bool should_gc(int gc_collect_rate, double timestamp)
{
BLI_assert(gc_timestamp != 0.0f);
return !compilation_handle && status != GPU_PASS_FAILED &&
(timestamp - gc_timestamp) >= gc_collect_rate;
}
};
GPUPassStatus GPU_pass_status(GPUPass *pass)
{
return pass->status;
}
bool GPU_pass_should_optimize(GPUPass *pass)
{
/* Returns optimization heuristic prepared during
* initial codegen.
* NOTE: Only enabled on Metal, since it doesn't seem to yield any performance improvements for
* other backends. */
return (GPU_backend_get_type() == GPU_BACKEND_METAL) && pass->should_optimize;
}
blender::gpu::Shader *GPU_pass_shader_get(GPUPass *pass)
{
return pass->shader;
}
void GPU_pass_acquire(GPUPass *pass)
{
int previous_refcount = pass->refcount++;
UNUSED_VARS_NDEBUG(previous_refcount);
BLI_assert(previous_refcount > 0);
}
void GPU_pass_release(GPUPass *pass)
{
int previous_refcount = pass->refcount--;
UNUSED_VARS_NDEBUG(previous_refcount);
BLI_assert(previous_refcount > 0);
}
uint64_t GPU_pass_global_compilation_count()
{
return GPUPass::compilation_counts;
}
uint64_t GPU_pass_compilation_timestamp(GPUPass *pass)
{
return pass->compilation_timestamp;
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name GPUPass Cache
*
* Internal shader cache: This prevent the shader recompilation / stall when
* using undo/redo AND also allows for GPUPass reuse if the Shader code is the
* same for 2 different Materials. Unused GPUPasses are free by Garbage collection.
* \{ */
class GPUPassCache {
/** Number of seconds with 0 users required before garbage collecting a pass. */
static constexpr float gc_collect_rate_ = 60.0f;
static constexpr float optimization_gc_collect_rate_ = 1.0f;
Map<uint32_t, std::unique_ptr<GPUPass>> passes_[GPU_MAT_ENGINE_MAX][2 /*is_optimization_pass*/];
std::mutex mutex_;
public:
void add(eGPUMaterialEngine engine,
GPUCodegen &codegen,
bool deferred_compilation,
bool is_optimization_pass)
{
std::lock_guard lock(mutex_);
passes_[engine][is_optimization_pass].add(
codegen.hash_get(),
std::make_unique<GPUPass>(codegen.create_info,
deferred_compilation,
is_optimization_pass,
codegen.should_optimize_heuristic()));
};
GPUPass *get(eGPUMaterialEngine engine,
size_t hash,
bool allow_deferred,
bool is_optimization_pass)
{
std::lock_guard lock(mutex_);
std::unique_ptr<GPUPass> *pass = passes_[engine][is_optimization_pass].lookup_ptr(hash);
if (!allow_deferred && pass && pass->get()->status == GPU_PASS_QUEUED) {
pass->get()->finalize_compilation();
}
return pass ? pass->get() : nullptr;
}
void update()
{
std::lock_guard lock(mutex_);
double timestamp = BLI_time_now_seconds();
/* Base Passes. */
for (auto &engine_passes : passes_) {
for (std::unique_ptr<GPUPass> &pass : engine_passes[false].values()) {
pass->update(timestamp);
}
engine_passes[false].remove_if(
[&](auto item) { return item.value->should_gc(gc_collect_rate_, timestamp); });
}
/* Optimization Passes */
for (auto &engine_passes : passes_) {
for (std::unique_ptr<GPUPass> &pass : engine_passes[true].values()) {
pass->update(timestamp);
}
engine_passes[true].remove_if([&](auto item) {
return item.value->should_gc(optimization_gc_collect_rate_, timestamp);
});
}
}
std::mutex &get_mutex()
{
return mutex_;
}
};
static GPUPassCache *g_cache = nullptr;
void GPU_pass_ensure_its_ready(GPUPass *pass)
{
if (pass->status == GPU_PASS_QUEUED) {
std::lock_guard lock(g_cache->get_mutex());
if (pass->status == GPU_PASS_QUEUED) {
pass->finalize_compilation();
}
}
}
void GPU_pass_cache_init()
{
g_cache = MEM_new<GPUPassCache>(__func__);
}
void GPU_pass_cache_update()
{
g_cache->update();
}
void GPU_pass_cache_wait_for_all()
{
GPU_shader_batch_wait_for_all();
g_cache->update();
}
void GPU_pass_cache_free()
{
MEM_SAFE_DELETE(g_cache);
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Compilation
* \{ */
static bool gpu_pass_validate(GPUCodegenCreateInfo *create_info)
{
int samplers_len = 0;
for (const ShaderCreateInfo::Resource &res : create_info->resources_get_all_()) {
if (res.bind_type == ShaderCreateInfo::Resource::BindType::SAMPLER) {
samplers_len++;
}
}
/* Validate against GPU limit. */
if ((samplers_len > GPU_max_textures_frag()) || (samplers_len > GPU_max_textures_vert())) {
return false;
}
return (samplers_len * 2 <= GPU_max_textures());
}
GPUPass *GPU_generate_pass(GPUMaterial *material,
GPUNodeGraph *graph,
const char *debug_name,
eGPUMaterialEngine engine,
bool deferred_compilation,
GPUCodegenCallbackFn finalize_source_cb,
void *thunk,
bool optimize_graph)
{
gpu_node_graph_prune_unused(graph);
/* If Optimize flag is passed in, we are generating an optimized
* variant of the GPUMaterial's GPUPass. */
if (optimize_graph) {
gpu_node_graph_optimize(graph);
}
/* Extract attributes before compiling so the generated VBOs are ready to accept the future
* shader. */
gpu_node_graph_finalize_uniform_attrs(graph);
GPUCodegen codegen(material, graph, debug_name);
codegen.generate_graphs();
codegen.generate_cryptomatte();
GPUPass *pass = nullptr;
if (!optimize_graph) {
/* The optimized version of the shader should not re-generate a UBO.
* The UBO will not be used for this variant. */
codegen.generate_uniform_buffer();
}
/* Cache lookup: Reuse shaders already compiled. */
pass = g_cache->get(engine, codegen.hash_get(), deferred_compilation, optimize_graph);
if (pass) {
pass->refcount++;
return pass;
}
/* The shader is not compiled, continue generating the shader strings. */
codegen.generate_attribs();
codegen.generate_resources();
/* Make engine add its own code and implement the generated functions. */
finalize_source_cb(thunk, material, &codegen.output);
codegen.create_info->finalize();
g_cache->add(engine, codegen, deferred_compilation, optimize_graph);
codegen.create_info = nullptr;
return g_cache->get(engine, codegen.hash_get(), deferred_compilation, optimize_graph);
}
/** \} */