Refactor: GPU: GPUMaterial & GPUPass compilation

Cleanup and simplification of GPUMaterial and GPUPass compilation.
See #133674 for details/goals.

- Remove the `draw_manage_shader` thread.
  Deferred compilation is now handled by the gpu::ShaderCompiler
  through the batch compilation API.
  Batch management is handled by the `GPUPassCache`.
- Simplify `GPUMaterial` status tracking so it just queries the
  `GPUPass` status.
- Split the `GPUPass` and the `GPUCodegen` code.
- Replaced the (broken) `GPU_material_recalc_flag_get` with the new
  `GPU_pass_compilation_timestamp`.
- Add the `GPU_pass_cache_wait_for_all` and
  `GPU_shader_batch_wait_for_all`, and remove the busy waits from
   EEVEE.
- Remove many unused functions, properties, includes...

Pull Request: https://projects.blender.org/blender/blender/pulls/135637
This commit is contained in:
Miguel Pozo
2025-05-22 17:53:22 +02:00
parent 9222daacb7
commit e6638d6e5e
29 changed files with 1283 additions and 2111 deletions

View File

@@ -44,8 +44,6 @@ ShaderOperation::ShaderOperation(Context &context,
{
material_ = GPU_material_from_callbacks(
GPU_MAT_COMPOSITOR, &construct_material, &generate_code, this);
GPU_material_status_set(material_, GPU_MAT_QUEUED);
GPU_material_compile(material_);
}
ShaderOperation::~ShaderOperation()

View File

@@ -75,7 +75,6 @@ set(SRC
intern/draw_gpu_context.cc
intern/draw_hair.cc
intern/draw_manager.cc
intern/draw_manager_shader.cc
intern/draw_manager_text.cc
intern/draw_pbvh.cc
intern/draw_pointcloud.cc

View File

@@ -180,9 +180,6 @@ void DRW_system_gpu_render_context_disable(void *re_system_gpu_context);
void DRW_blender_gpu_render_context_enable(void *re_gpu_context);
void DRW_blender_gpu_render_context_disable(void *re_gpu_context);
void DRW_deferred_shader_remove(GPUMaterial *mat);
void DRW_deferred_shader_optimize_remove(GPUMaterial *mat);
DRWData *DRW_viewport_data_create();
void DRW_viewport_data_free(DRWData *drw_data);

View File

@@ -25,6 +25,7 @@
#include "ED_screen.hh"
#include "ED_view3d.hh"
#include "GPU_context.hh"
#include "GPU_pass.hh"
#include "IMB_imbuf_types.hh"
#include "RE_pipeline.h"
@@ -484,9 +485,12 @@ void Instance::render_sample()
if (!is_viewport() && sampling.do_render_sync()) {
render_sync();
while (materials.queued_shaders_count > 0) {
/* Leave some time for shaders to compile. */
BLI_time_sleep_ms(50);
/** WORKAROUND: Re-sync to check if all shaders are already compiled. */
GPU_pass_cache_wait_for_all();
/** WORKAROUND: Re-sync now that all shaders are compiled. */
/* This may need to happen more than once, since actual materials may require more passes
* (eg. volume ones) than the fallback material used for queued passes. */
/* TODO(@pragma37): There seems to be an issue where multiple `step_object_sync` calls on the
* same step can cause mismatching `has_motion` values between sync. */
render_sync();
}
}
@@ -824,10 +828,13 @@ void Instance::light_bake_irradiance(
custom_pipeline_wrapper([&]() {
this->render_sync();
while (materials.queued_shaders_count > 0) {
/* Leave some time for shaders to compile. */
BLI_time_sleep_ms(50);
/** WORKAROUND: Re-sync to check if all shaders are already compiled. */
this->render_sync();
GPU_pass_cache_wait_for_all();
/** WORKAROUND: Re-sync now that all shaders are compiled. */
/* This may need to happen more than once, since actual materials may require more passes
* (eg. volume ones) than the fallback material used for queued passes. */
/* TODO(@pragma37): There seems to be an issue where multiple `step_object_sync` calls on the
* same step can cause mismatching `has_motion` values between sync. */
render_sync();
}
/* Sampling module needs to be initialized to computing lighting. */
sampling.init(probe);

View File

@@ -312,7 +312,7 @@ void LookdevModule::sync_pass(PassSimple &pass,
const DRWState state = DRW_STATE_WRITE_COLOR | DRW_STATE_CULL_BACK;
GPUMaterial *gpumat = inst_.shaders.material_shader_get(
mat, mat->nodetree, MAT_PIPE_FORWARD, MAT_GEOM_MESH, MAT_PROBE_NONE);
mat, mat->nodetree, MAT_PIPE_FORWARD, MAT_GEOM_MESH, false, inst_.materials.default_surface);
pass.state_set(state);
pass.material_set(*inst_.manager, gpumat);
pass.bind_texture(RBUFS_UTILITY_TEX_SLOT, inst_.pipelines.utility_tx);

View File

@@ -119,6 +119,12 @@ MaterialModule::MaterialModule(Instance &inst) : inst_(inst)
bke::node_set_active(*ntree, *output);
}
{
default_surface = reinterpret_cast<::Material *>(BKE_id_copy_ex(
nullptr, &BKE_material_default_surface()->id, nullptr, LIB_ID_COPY_LOCALIZE));
default_volume = reinterpret_cast<::Material *>(BKE_id_copy_ex(
nullptr, &BKE_material_default_volume()->id, nullptr, LIB_ID_COPY_LOCALIZE));
}
{
error_mat_ = BKE_id_new_nomain<::Material>("EEVEE default error");
bNodeTree *ntree = bke::node_tree_add_tree_embedded(
@@ -146,6 +152,8 @@ MaterialModule::~MaterialModule()
{
BKE_id_free(nullptr, metallic_mat);
BKE_id_free(nullptr, diffuse_mat);
BKE_id_free(nullptr, default_surface);
BKE_id_free(nullptr, default_volume);
BKE_id_free(nullptr, error_mat_);
}
@@ -154,6 +162,10 @@ void MaterialModule::begin_sync()
queued_shaders_count = 0;
queued_optimize_shaders_count = 0;
uint64_t next_update = GPU_pass_global_compilation_count();
gpu_pass_last_update_ = gpu_pass_next_update_;
gpu_pass_next_update_ = next_update;
material_map_.clear();
shader_map_.clear();
}
@@ -174,11 +186,13 @@ MaterialPass MaterialModule::material_pass_get(Object *ob,
use_deferred_compilation = false;
}
const bool is_volume = ELEM(pipeline_type, MAT_PIPE_VOLUME_OCCUPANCY, MAT_PIPE_VOLUME_MATERIAL);
::Material *default_mat = is_volume ? default_volume : default_surface;
MaterialPass matpass = MaterialPass();
matpass.gpumat = inst_.shaders.material_shader_get(
blender_mat, ntree, pipeline_type, geometry_type, use_deferred_compilation);
blender_mat, ntree, pipeline_type, geometry_type, use_deferred_compilation, default_mat);
const bool is_volume = ELEM(pipeline_type, MAT_PIPE_VOLUME_OCCUPANCY, MAT_PIPE_VOLUME_MATERIAL);
const bool is_forward = ELEM(pipeline_type,
MAT_PIPE_FORWARD,
MAT_PIPE_PREPASS_FORWARD,
@@ -196,12 +210,13 @@ MaterialPass MaterialModule::material_pass_get(Object *ob,
}
case GPU_MAT_QUEUED:
queued_shaders_count++;
matpass.gpumat = inst_.shaders.material_default_shader_get(pipeline_type, geometry_type);
matpass.gpumat = inst_.shaders.material_shader_get(
default_mat, default_mat->nodetree, pipeline_type, geometry_type, false, nullptr);
break;
case GPU_MAT_FAILED:
default:
matpass.gpumat = inst_.shaders.material_shader_get(
error_mat_, error_mat_->nodetree, pipeline_type, geometry_type, false);
error_mat_, error_mat_->nodetree, pipeline_type, geometry_type, false, nullptr);
break;
}
/* Returned material should be ready to be drawn. */
@@ -211,11 +226,9 @@ MaterialPass MaterialModule::material_pass_get(Object *ob,
const bool is_transparent = GPU_material_flag_get(matpass.gpumat, GPU_MATFLAG_TRANSPARENT);
if (inst_.is_viewport() && use_deferred_compilation &&
GPU_material_recalc_flag_get(matpass.gpumat))
{
/* TODO(Miguel Pozo): This is broken, it consumes the flag,
* but GPUMats can be shared across viewports. */
bool pass_updated = GPU_material_compilation_timestamp(matpass.gpumat) > gpu_pass_last_update_;
if (inst_.is_viewport() && use_deferred_compilation && pass_updated) {
inst_.sampling.reset();
const bool has_displacement = GPU_material_has_displacement_output(matpass.gpumat) &&

View File

@@ -352,6 +352,8 @@ class MaterialModule {
public:
::Material *diffuse_mat;
::Material *metallic_mat;
::Material *default_surface;
::Material *default_volume;
int64_t queued_shaders_count = 0;
int64_t queued_optimize_shaders_count = 0;
@@ -368,6 +370,9 @@ class MaterialModule {
::Material *error_mat_;
uint64_t gpu_pass_last_update_ = 0;
uint64_t gpu_pass_next_update_ = 0;
public:
MaterialModule(Instance &inst);
~MaterialModule();

View File

@@ -12,6 +12,7 @@
#include "GPU_capabilities.hh"
#include "BKE_material.hh"
#include "DNA_world_types.h"
#include "gpu_shader_create_info.hh"
@@ -916,17 +917,25 @@ void ShaderModule::material_create_info_amend(GPUMaterial *gpumat, GPUCodegenOut
}
}
struct CallbackThunk {
ShaderModule *shader_module;
::Material *default_mat;
};
/* WATCH: This can be called from another thread! Needs to not touch the shader module in any
* thread unsafe manner. */
static void codegen_callback(void *thunk, GPUMaterial *mat, GPUCodegenOutput *codegen)
static void codegen_callback(void *void_thunk, GPUMaterial *mat, GPUCodegenOutput *codegen)
{
reinterpret_cast<ShaderModule *>(thunk)->material_create_info_amend(mat, codegen);
CallbackThunk *thunk = static_cast<CallbackThunk *>(void_thunk);
thunk->shader_module->material_create_info_amend(mat, codegen);
}
static GPUPass *pass_replacement_cb(void *thunk, GPUMaterial *mat)
static GPUPass *pass_replacement_cb(void *void_thunk, GPUMaterial *mat)
{
using namespace blender::gpu::shader;
CallbackThunk *thunk = static_cast<CallbackThunk *>(void_thunk);
const ::Material *blender_mat = GPU_material_get_material(mat);
uint64_t shader_uuid = GPU_material_uuid_get(mat);
@@ -963,100 +972,66 @@ static GPUPass *pass_replacement_cb(void *thunk, GPUMaterial *mat)
(is_prepass && (!has_vertex_displacement && !has_transparency &&
!has_raytraced_transmission));
if (can_use_default) {
GPUMaterial *mat = reinterpret_cast<ShaderModule *>(thunk)->material_default_shader_get(
pipeline_type, geometry_type);
GPUMaterial *mat = thunk->shader_module->material_shader_get(thunk->default_mat,
thunk->default_mat->nodetree,
pipeline_type,
geometry_type,
false,
nullptr);
return GPU_material_get_pass(mat);
}
return nullptr;
}
GPUMaterial *ShaderModule::material_default_shader_get(eMaterialPipeline pipeline_type,
eMaterialGeometry geometry_type)
{
bool is_volume = ELEM(pipeline_type, MAT_PIPE_VOLUME_MATERIAL, MAT_PIPE_VOLUME_OCCUPANCY);
::Material *blender_mat = (is_volume) ? BKE_material_default_volume() :
BKE_material_default_surface();
return material_shader_get(
blender_mat, blender_mat->nodetree, pipeline_type, geometry_type, false);
}
GPUMaterial *ShaderModule::material_shader_get(::Material *blender_mat,
bNodeTree *nodetree,
eMaterialPipeline pipeline_type,
eMaterialGeometry geometry_type,
bool deferred_compilation)
bool deferred_compilation,
::Material *default_mat)
{
bool is_volume = ELEM(pipeline_type, MAT_PIPE_VOLUME_MATERIAL, MAT_PIPE_VOLUME_OCCUPANCY);
eMaterialDisplacement displacement_type = to_displacement_type(blender_mat->displacement_method);
eMaterialThickness thickness_type = to_thickness_type(blender_mat->thickness_mode);
uint64_t shader_uuid = shader_uuid_from_material_type(
pipeline_type, geometry_type, displacement_type, thickness_type, blender_mat->blend_flag);
bool is_default_material = ELEM(
blender_mat, BKE_material_default_surface(), BKE_material_default_volume());
bool is_default_material = default_mat == nullptr;
BLI_assert(blender_mat != default_mat);
GPUMaterial *mat = DRW_shader_from_material(blender_mat,
nodetree,
GPU_MAT_EEVEE,
shader_uuid,
is_volume,
deferred_compilation,
codegen_callback,
this,
is_default_material ? nullptr : pass_replacement_cb);
CallbackThunk thunk = {this, default_mat};
return mat;
return GPU_material_from_nodetree(blender_mat,
nodetree,
&blender_mat->gpumaterial,
blender_mat->id.name,
GPU_MAT_EEVEE,
shader_uuid,
deferred_compilation,
codegen_callback,
&thunk,
is_default_material ? nullptr : pass_replacement_cb);
}
GPUMaterial *ShaderModule::world_shader_get(::World *blender_world,
bNodeTree *nodetree,
eMaterialPipeline pipeline_type)
eMaterialPipeline pipeline_type,
bool deferred_compilation)
{
bool is_volume = (pipeline_type == MAT_PIPE_VOLUME_MATERIAL);
bool defer_compilation = is_volume;
uint64_t shader_uuid = shader_uuid_from_material_type(pipeline_type, MAT_GEOM_WORLD);
return DRW_shader_from_world(blender_world,
nodetree,
GPU_MAT_EEVEE,
shader_uuid,
is_volume,
defer_compilation,
codegen_callback,
this);
}
CallbackThunk thunk = {this, nullptr};
GPUMaterial *ShaderModule::material_shader_get(const char *name,
ListBase &materials,
bNodeTree *nodetree,
eMaterialPipeline pipeline_type,
eMaterialGeometry geometry_type)
{
uint64_t shader_uuid = shader_uuid_from_material_type(pipeline_type, geometry_type);
bool is_volume = ELEM(pipeline_type, MAT_PIPE_VOLUME_MATERIAL, MAT_PIPE_VOLUME_OCCUPANCY);
GPUMaterial *gpumat = GPU_material_from_nodetree(nullptr,
nullptr,
nodetree,
&materials,
name,
GPU_MAT_EEVEE,
shader_uuid,
is_volume,
false,
codegen_callback,
this);
GPU_material_status_set(gpumat, GPU_MAT_CREATED);
GPU_material_compile(gpumat);
/* Queue deferred material optimization. */
DRW_shader_queue_optimize_material(gpumat);
return gpumat;
return GPU_material_from_nodetree(nullptr,
nodetree,
&blender_world->gpumaterial,
blender_world->id.name,
GPU_MAT_EEVEE,
shader_uuid,
deferred_compilation,
codegen_callback,
&thunk);
}
/** \} */

View File

@@ -235,26 +235,16 @@ class ShaderModule {
bool use_lightprobe_eval);
GPUShader *static_shader_get(eShaderType shader_type);
GPUMaterial *material_default_shader_get(eMaterialPipeline pipeline_type,
eMaterialGeometry geometry_type);
GPUMaterial *material_shader_get(::Material *blender_mat,
bNodeTree *nodetree,
eMaterialPipeline pipeline_type,
eMaterialGeometry geometry_type,
bool deferred_compilation);
bool deferred_compilation,
::Material *default_mat);
GPUMaterial *world_shader_get(::World *blender_world,
bNodeTree *nodetree,
eMaterialPipeline pipeline_type);
/**
* Variation to compile a material only with a `nodetree`. Caller needs to maintain the list of
* materials and call GPU_material_free on it to update the material.
*/
GPUMaterial *material_shader_get(const char *name,
ListBase &materials,
bNodeTree *nodetree,
eMaterialPipeline pipeline_type,
eMaterialGeometry geometry_type);
eMaterialPipeline pipeline_type,
bool deferred_compilation);
void material_create_info_amend(GPUMaterial *mat, GPUCodegenOutput *codegen);

View File

@@ -148,7 +148,7 @@ void World::sync()
inst_.sampling.reset();
}
GPUMaterial *gpumat = inst_.shaders.world_shader_get(bl_world, ntree, MAT_PIPE_DEFERRED);
GPUMaterial *gpumat = inst_.shaders.world_shader_get(bl_world, ntree, MAT_PIPE_DEFERRED, false);
inst_.manager->register_layer_attributes(gpumat);
@@ -169,7 +169,8 @@ void World::sync_volume(const WorldHandle &world_handle)
/* Only the scene world nodetree can have volume shader. */
if (world && world->nodetree && world->use_nodes) {
gpumat = inst_.shaders.world_shader_get(world, world->nodetree, MAT_PIPE_VOLUME_MATERIAL);
gpumat = inst_.shaders.world_shader_get(
world, world->nodetree, MAT_PIPE_VOLUME_MATERIAL, !inst_.is_image_render);
}
bool had_volume = has_volume_;

View File

@@ -123,31 +123,6 @@ struct DrawEngine {
};
};
/* Shaders */
/** IMPORTANT: Modify the currently bound context. */
void DRW_shader_init();
void DRW_shader_exit();
GPUMaterial *DRW_shader_from_world(World *wo,
bNodeTree *ntree,
eGPUMaterialEngine engine,
const uint64_t shader_id,
const bool is_volume_shader,
bool deferred,
GPUCodegenCallbackFn callback,
void *thunk);
GPUMaterial *DRW_shader_from_material(
Material *ma,
bNodeTree *ntree,
eGPUMaterialEngine engine,
const uint64_t shader_id,
const bool is_volume_shader,
bool deferred,
GPUCodegenCallbackFn callback,
void *thunk,
GPUMaterialPassReplacementCallbackFn pass_replacement_cb = nullptr);
void DRW_shader_queue_optimize_material(GPUMaterial *mat);
/* Viewport. */
/**

View File

@@ -150,17 +150,9 @@ void DRW_gpu_context_create()
viewport_context = MEM_new<ContextShared>(__func__);
preview_context = MEM_new<ContextShared>(__func__);
{
/** IMPORTANT: Very delicate context handling. Changing the order of context creation makes it
* crash in background mode on windows (see #136270). */
/* Setup compilation context. Called first as it changes the active GPUContext. */
DRW_shader_init();
/* Some part of the code assumes no context is left bound. */
GPU_context_active_set(nullptr);
WM_system_gpu_context_release(preview_context->system_gpu_context_);
}
/* Some part of the code assumes no context is left bound. */
GPU_context_active_set(nullptr);
WM_system_gpu_context_release(preview_context->system_gpu_context_);
/* Activate the window's context if any. */
wm_window_reset_drawable();
@@ -172,7 +164,6 @@ void DRW_gpu_context_destroy()
if (viewport_context == nullptr) {
return;
}
DRW_shader_exit();
DRW_submission_mutex_exit();
MEM_SAFE_DELETE(viewport_context);

View File

@@ -1,435 +0,0 @@
/* SPDX-FileCopyrightText: 2016 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
/** \file
* \ingroup draw
*/
#include "DNA_material_types.h"
#include "DNA_world_types.h"
#include "BLI_threads.h"
#include "BLI_time.h"
#include "DEG_depsgraph_query.hh"
#include "GPU_capabilities.hh"
#include "GPU_material.hh"
#include "GPU_state.hh"
#include "WM_api.hh"
#include "draw_context_private.hh"
#include <atomic>
#include <condition_variable>
#include <mutex>
extern "C" char datatoc_gpu_shader_depth_only_frag_glsl[];
extern "C" char datatoc_common_fullscreen_vert_glsl[];
using namespace blender;
/* -------------------------------------------------------------------- */
/** \name Deferred Compilation (DRW_deferred)
*
* Since compiling shader can take a long time, we do it in a non blocking
* manner in another thread.
*
* \{ */
struct DRWShaderCompiler {
/** Default compilation queue. */
Vector<GPUMaterial *> queue;
/** Optimization queue. */
Vector<GPUMaterial *> optimize_queue;
std::mutex queue_mutex;
std::condition_variable queue_cv;
void *system_gpu_context;
GPUContext *blender_gpu_context;
std::atomic<bool> stop;
};
/** NOTE: While the `BLI_threads` API requires a List,
* we only create a single thread at application startup and delete it at exit. */
static ListBase &compilation_threadpool()
{
static ListBase compilation_threadpool_ = {};
return compilation_threadpool_;
}
static DRWShaderCompiler &compiler_data()
{
static DRWShaderCompiler compiler_data_ = {};
return compiler_data_;
}
static void *drw_deferred_shader_compilation_exec(void * /*unused*/)
{
using namespace blender;
void *system_gpu_context = compiler_data().system_gpu_context;
GPUContext *blender_gpu_context = compiler_data().blender_gpu_context;
BLI_assert(system_gpu_context != nullptr);
BLI_assert(blender_gpu_context != nullptr);
GPU_render_begin();
WM_system_gpu_context_activate(system_gpu_context);
GPU_context_active_set(blender_gpu_context);
const bool use_parallel_compilation = GPU_use_parallel_compilation();
Vector<GPUMaterial *> async_mats;
while (true) {
if (compiler_data().stop) {
break;
}
compiler_data().queue_mutex.lock();
/* Pop last because it will be less likely to lock the main thread
* if all GPUMaterials are to be freed (see DRW_deferred_shader_remove()). */
GPUMaterial *mat = compiler_data().queue.is_empty() ? nullptr :
compiler_data().queue.pop_last();
if (mat) {
/* Avoid another thread freeing the material mid compilation. */
GPU_material_acquire(mat);
}
compiler_data().queue_mutex.unlock();
if (mat) {
/* We have a new material that must be compiled,
* we either compile it directly or add it to the async compilation list. */
if (use_parallel_compilation) {
GPU_material_async_compile(mat);
async_mats.append(mat);
}
else {
GPU_material_compile(mat);
GPU_material_release(mat);
}
}
else if (!async_mats.is_empty()) {
/* (only if use_parallel_compilation == true)
* Keep querying the requested materials until all of them are ready. */
async_mats.remove_if([](GPUMaterial *mat) {
if (GPU_material_async_try_finalize(mat)) {
GPU_material_release(mat);
return true;
}
return false;
});
}
else {
/* Check for Material Optimization job once there are no more
* shaders to compile. */
compiler_data().queue_mutex.lock();
/* Pop last because it will be less likely to lock the main thread
* if all GPUMaterials are to be freed (see DRW_deferred_shader_remove()). */
GPUMaterial *optimize_mat = compiler_data().optimize_queue.is_empty() ?
nullptr :
compiler_data().optimize_queue.pop_last();
if (optimize_mat) {
/* Avoid another thread freeing the material during optimization. */
GPU_material_acquire(optimize_mat);
}
compiler_data().queue_mutex.unlock();
if (optimize_mat) {
/* Compile optimized material shader. */
GPU_material_optimize(optimize_mat);
GPU_material_release(optimize_mat);
}
else {
/* No more materials to optimize, or shaders to compile. */
std::unique_lock lock(compiler_data().queue_mutex);
compiler_data().queue_cv.wait(lock);
}
}
if (GPU_type_matches_ex(GPU_DEVICE_ANY, GPU_OS_ANY, GPU_DRIVER_ANY, GPU_BACKEND_OPENGL)) {
GPU_flush();
}
}
/* We have to wait until all the requested batches are ready,
* even if compiler_data().stop is true. */
while (!async_mats.is_empty()) {
async_mats.remove_if([](GPUMaterial *mat) {
if (GPU_material_async_try_finalize(mat)) {
GPU_material_release(mat);
return true;
}
return false;
});
}
GPU_context_active_set(nullptr);
WM_system_gpu_context_release(system_gpu_context);
GPU_render_end();
return nullptr;
}
void DRW_shader_init()
{
if (GPU_use_main_context_workaround()) {
/* Deferred compilation is not supported. */
return;
}
static bool initialized = false;
if (initialized) {
BLI_assert_unreachable();
return;
}
initialized = true;
compiler_data().stop = false;
compiler_data().system_gpu_context = WM_system_gpu_context_create();
compiler_data().blender_gpu_context = GPU_context_create(nullptr,
compiler_data().system_gpu_context);
/* Some part of the code assumes no context is left bound. */
GPU_context_active_set(nullptr);
WM_system_gpu_context_release(compiler_data().system_gpu_context);
BLI_threadpool_init(&compilation_threadpool(), drw_deferred_shader_compilation_exec, 1);
BLI_threadpool_insert(&compilation_threadpool(), nullptr);
}
void DRW_shader_exit()
{
if (GPU_use_main_context_workaround()) {
/* Deferred compilation is not supported. */
return;
}
compiler_data().stop = true;
compiler_data().queue_cv.notify_one();
BLI_threadpool_end(&compilation_threadpool());
/* Revert the queued state for the materials that has not been compiled.
* Note that this is not strictly needed since this function is called at program exit. */
{
std::scoped_lock queue_lock(compiler_data().queue_mutex);
while (!compiler_data().queue.is_empty()) {
GPU_material_status_set(compiler_data().queue.pop_last(), GPU_MAT_CREATED);
}
while (!compiler_data().optimize_queue.is_empty()) {
GPU_material_optimization_status_set(compiler_data().optimize_queue.pop_last(),
GPU_MAT_OPTIMIZATION_READY);
}
}
WM_system_gpu_context_activate(compiler_data().system_gpu_context);
GPU_context_active_set(compiler_data().blender_gpu_context);
GPU_context_discard(compiler_data().blender_gpu_context);
WM_system_gpu_context_dispose(compiler_data().system_gpu_context);
}
/**
* Append either shader compilation or optimization job to deferred queue.
* We keep two separate queue's to ensure core compilations always complete before optimization.
*/
static void drw_deferred_queue_append(GPUMaterial *mat, bool is_optimization_job)
{
std::scoped_lock queue_lock(compiler_data().queue_mutex);
/* Add to either compilation or optimization queue. */
if (is_optimization_job) {
BLI_assert(GPU_material_optimization_status(mat) != GPU_MAT_OPTIMIZATION_QUEUED);
GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_QUEUED);
compiler_data().optimize_queue.append(mat);
}
else {
GPU_material_status_set(mat, GPU_MAT_QUEUED);
compiler_data().queue.append(mat);
}
compiler_data().queue_cv.notify_one();
}
static void drw_deferred_shader_add(GPUMaterial *mat, bool deferred)
{
if (ELEM(GPU_material_status(mat), GPU_MAT_SUCCESS, GPU_MAT_FAILED)) {
return;
}
if (GPU_use_main_context_workaround()) {
deferred = false;
}
if (!deferred) {
DRW_deferred_shader_remove(mat);
/* Shaders could already be compiling. Have to wait for compilation to finish. */
while (GPU_material_status(mat) == GPU_MAT_QUEUED) {
BLI_time_sleep_ms(20);
}
if (GPU_material_status(mat) == GPU_MAT_CREATED) {
GPU_material_compile(mat);
}
return;
}
/* Don't add material to the queue twice. */
if (GPU_material_status(mat) == GPU_MAT_QUEUED) {
return;
}
/* Add deferred shader compilation to queue. */
drw_deferred_queue_append(mat, false);
}
void DRW_deferred_shader_remove(GPUMaterial *mat)
{
if (GPU_use_main_context_workaround()) {
/* Deferred compilation is not supported. */
return;
}
std::scoped_lock queue_lock(compiler_data().queue_mutex);
/* Search for compilation job in queue. */
if (compiler_data().queue.contains(mat)) {
compiler_data().queue.remove_first_occurrence_and_reorder(mat);
GPU_material_status_set(mat, GPU_MAT_CREATED);
}
/* Search for optimization job in queue. */
if (compiler_data().optimize_queue.contains(mat)) {
compiler_data().optimize_queue.remove_first_occurrence_and_reorder(mat);
GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_READY);
}
}
void DRW_deferred_shader_optimize_remove(GPUMaterial *mat)
{
if (GPU_use_main_context_workaround()) {
/* Deferred compilation is not supported. */
return;
}
std::scoped_lock queue_lock(compiler_data().queue_mutex);
/* Search for optimization job in queue. */
if (compiler_data().optimize_queue.contains(mat)) {
compiler_data().optimize_queue.remove_first_occurrence_and_reorder(mat);
GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_READY);
}
}
/** \} */
/* -------------------------------------------------------------------- */
/** \{ */
GPUMaterial *DRW_shader_from_world(World *wo,
bNodeTree *ntree,
eGPUMaterialEngine engine,
const uint64_t shader_id,
const bool is_volume_shader,
bool deferred,
GPUCodegenCallbackFn callback,
void *thunk)
{
Scene *scene = DEG_get_original(drw_get().scene);
GPUMaterial *mat = GPU_material_from_nodetree(scene,
nullptr,
ntree,
&wo->gpumaterial,
wo->id.name,
engine,
shader_id,
is_volume_shader,
false,
callback,
thunk);
if (DRW_context_get()->is_image_render()) {
/* Do not deferred if doing render. */
deferred = false;
}
drw_deferred_shader_add(mat, deferred);
DRW_shader_queue_optimize_material(mat);
return mat;
}
GPUMaterial *DRW_shader_from_material(Material *ma,
bNodeTree *ntree,
eGPUMaterialEngine engine,
const uint64_t shader_id,
const bool is_volume_shader,
bool deferred,
GPUCodegenCallbackFn callback,
void *thunk,
GPUMaterialPassReplacementCallbackFn pass_replacement_cb)
{
Scene *scene = DEG_get_original(drw_get().scene);
GPUMaterial *mat = GPU_material_from_nodetree(scene,
ma,
ntree,
&ma->gpumaterial,
ma->id.name,
engine,
shader_id,
is_volume_shader,
false,
callback,
thunk,
pass_replacement_cb);
drw_deferred_shader_add(mat, deferred);
DRW_shader_queue_optimize_material(mat);
return mat;
}
void DRW_shader_queue_optimize_material(GPUMaterial *mat)
{
/* Do not perform deferred optimization if performing render.
* De-queue any queued optimization jobs. */
if (DRW_context_get()->is_image_render()) {
if (GPU_material_optimization_status(mat) == GPU_MAT_OPTIMIZATION_QUEUED) {
/* Remove from pending optimization job queue. */
DRW_deferred_shader_optimize_remove(mat);
/* If optimization job had already started, wait for it to complete. */
while (GPU_material_optimization_status(mat) == GPU_MAT_OPTIMIZATION_QUEUED) {
BLI_time_sleep_ms(20);
}
}
return;
}
/* We do not need to perform optimization on the material if it is already compiled or in the
* optimization queue. If optimization is not required, the status will be flagged as
* `GPU_MAT_OPTIMIZATION_SKIP`.
* We can also skip cases which have already been queued up. */
if (ELEM(GPU_material_optimization_status(mat),
GPU_MAT_OPTIMIZATION_SKIP,
GPU_MAT_OPTIMIZATION_SUCCESS,
GPU_MAT_OPTIMIZATION_QUEUED))
{
return;
}
/* Only queue optimization once the original shader has been successfully compiled. */
if (GPU_material_status(mat) != GPU_MAT_SUCCESS) {
return;
}
/* Defer optimization until sufficient time has passed beyond creation. This avoids excessive
* recompilation for shaders which are being actively modified. */
if (!GPU_material_optimization_ready(mat)) {
return;
}
/* Add deferred shader compilation to queue. */
drw_deferred_queue_append(mat, true);
}
/** \} */

View File

@@ -50,6 +50,7 @@
#include "GPU_debug.hh"
#include "GPU_index_buffer.hh"
#include "GPU_material.hh"
#include "GPU_pass.hh"
#include "DRW_gpu_wrapper.hh"
@@ -59,8 +60,6 @@
#include "draw_shader_shared.hh"
#include "draw_state.hh"
#include "intern/gpu_codegen.hh"
#include <cstdint>
#include <sstream>

View File

@@ -64,7 +64,6 @@
#include "GPU_framebuffer.hh"
#include "GPU_immediate.hh"
#include "GPU_immediate_util.hh"
#include "GPU_material.hh"
#include "GPU_matrix.hh"
#include "GPU_state.hh"
#include "GPU_viewport.hh"
@@ -1683,7 +1682,6 @@ void view3d_main_region_draw(const bContext *C, ARegion *region)
DRW_cache_free_old_subdiv();
DRW_cache_free_old_batches(bmain);
BKE_image_free_old_gputextures(bmain);
GPU_pass_cache_garbage_collect();
/* No depth test for drawing action zones afterwards. */
GPU_depth_test(GPU_DEPTH_NONE);

View File

@@ -77,6 +77,7 @@ set(SRC
intern/gpu_material.cc
intern/gpu_matrix.cc
intern/gpu_node_graph.cc
intern/gpu_pass.cc
intern/gpu_platform.cc
intern/gpu_query.cc
intern/gpu_select.cc
@@ -118,6 +119,7 @@ set(SRC
GPU_init_exit.hh
GPU_material.hh
GPU_matrix.hh
GPU_pass.hh
GPU_platform.hh
GPU_platform_backend_enum.h
GPU_primitive.hh

View File

@@ -33,31 +33,28 @@ struct Scene;
struct bNode;
struct bNodeTree;
/* Functions to create GPU Materials nodes. */
/**
* High level functions to create and use GPU materials.
*/
enum eGPUType {
/* Keep in sync with GPU_DATATYPE_STR */
/* The value indicates the number of elements in each type */
GPU_NONE = 0,
GPU_FLOAT = 1,
GPU_VEC2 = 2,
GPU_VEC3 = 3,
GPU_VEC4 = 4,
GPU_MAT3 = 9,
GPU_MAT4 = 16,
GPU_MAX_CONSTANT_DATA = GPU_MAT4,
enum eGPUMaterialEngine {
GPU_MAT_EEVEE,
GPU_MAT_COMPOSITOR,
GPU_MAT_ENGINE_MAX,
};
/* Values not in GPU_DATATYPE_STR */
GPU_TEX1D_ARRAY = 1001,
GPU_TEX2D = 1002,
GPU_TEX2D_ARRAY = 1003,
GPU_TEX3D = 1004,
enum eGPUMaterialStatus {
GPU_MAT_FAILED = 0,
GPU_MAT_QUEUED,
GPU_MAT_SUCCESS,
};
/* GLSL Struct types */
GPU_CLOSURE = 1007,
/* Opengl Attributes */
GPU_ATTR = 3001,
/* GPU_MAT_OPTIMIZATION_SKIP for cases where we do not
* plan to perform optimization on a given material. */
enum eGPUMaterialOptimizationStatus {
GPU_MAT_OPTIMIZATION_SKIP = 0,
GPU_MAT_OPTIMIZATION_QUEUED,
GPU_MAT_OPTIMIZATION_SUCCESS,
};
enum eGPUMaterialFlag {
@@ -84,12 +81,193 @@ enum eGPUMaterialFlag {
/* Tells the render engine the material was just compiled or updated. */
GPU_MATFLAG_UPDATED = (1 << 29),
};
ENUM_OPERATORS(eGPUMaterialFlag, GPU_MATFLAG_UPDATED);
/* HACK(fclem) Tells the environment texture node to not bail out if empty. */
GPU_MATFLAG_LOOKDEV_HACK = (1 << 30),
using GPUCodegenCallbackFn = void (*)(void *thunk,
GPUMaterial *mat,
struct GPUCodegenOutput *codegen);
/**
* Should return an already compiled pass if it's functionally equivalent to the one being
* compiled.
*/
using GPUMaterialPassReplacementCallbackFn = GPUPass *(*)(void *thunk, GPUMaterial *mat);
/** WARNING: gpumaterials thread safety must be ensured by the caller. */
GPUMaterial *GPU_material_from_nodetree(
Material *ma,
bNodeTree *ntree,
ListBase *gpumaterials,
const char *name,
eGPUMaterialEngine engine,
uint64_t shader_uuid,
bool deferred_compilation,
GPUCodegenCallbackFn callback,
void *thunk,
GPUMaterialPassReplacementCallbackFn pass_replacement_cb = nullptr);
/* A callback passed to GPU_material_from_callbacks to construct the material graph by adding and
* linking the necessary GPU material nodes. */
using ConstructGPUMaterialFn = void (*)(void *thunk, GPUMaterial *material);
/* Construct a GPU material from a set of callbacks. See the callback types for more information.
* The given thunk will be passed as the first parameter of each callback. */
GPUMaterial *GPU_material_from_callbacks(eGPUMaterialEngine engine,
ConstructGPUMaterialFn construct_function_cb,
GPUCodegenCallbackFn generate_code_function_cb,
void *thunk);
void GPU_material_free_single(GPUMaterial *material);
void GPU_material_free(ListBase *gpumaterial);
void GPU_materials_free(Main *bmain);
GPUPass *GPU_material_get_pass(GPUMaterial *material);
/** Return the most optimal shader configuration for the given material. */
GPUShader *GPU_material_get_shader(GPUMaterial *material);
const char *GPU_material_get_name(GPUMaterial *material);
/**
* Return can be null if it's a world material.
*/
Material *GPU_material_get_material(GPUMaterial *material);
/**
* Return true if the material compilation has not yet begin or begin.
*/
eGPUMaterialStatus GPU_material_status(GPUMaterial *mat);
/**
* Return status for asynchronous optimization jobs.
*/
eGPUMaterialOptimizationStatus GPU_material_optimization_status(GPUMaterial *mat);
uint64_t GPU_material_compilation_timestamp(GPUMaterial *mat);
GPUUniformBuf *GPU_material_uniform_buffer_get(GPUMaterial *material);
/**
* Create dynamic UBO from parameters
*
* \param inputs: Items are #LinkData, data is #GPUInput (`BLI_genericNodeN(GPUInput)`).
*/
void GPU_material_uniform_buffer_create(GPUMaterial *material, ListBase *inputs);
bool GPU_material_has_surface_output(GPUMaterial *mat);
bool GPU_material_has_volume_output(GPUMaterial *mat);
bool GPU_material_has_displacement_output(GPUMaterial *mat);
bool GPU_material_flag_get(const GPUMaterial *mat, eGPUMaterialFlag flag);
uint64_t GPU_material_uuid_get(GPUMaterial *mat);
struct GPULayerAttr {
GPULayerAttr *next, *prev;
/* Meaningful part of the attribute set key. */
char name[256]; /* Multiple MAX_CUSTOMDATA_LAYER_NAME */
/** Hash of name[68]. */
uint32_t hash_code;
/* Helper fields used by code generation. */
int users;
};
ENUM_OPERATORS(eGPUMaterialFlag, GPU_MATFLAG_LOOKDEV_HACK);
const ListBase *GPU_material_layer_attributes(const GPUMaterial *material);
/* Requested Material Attributes and Textures */
enum eGPUType {
/* Keep in sync with GPU_DATATYPE_STR */
/* The value indicates the number of elements in each type */
GPU_NONE = 0,
GPU_FLOAT = 1,
GPU_VEC2 = 2,
GPU_VEC3 = 3,
GPU_VEC4 = 4,
GPU_MAT3 = 9,
GPU_MAT4 = 16,
GPU_MAX_CONSTANT_DATA = GPU_MAT4,
/* Values not in GPU_DATATYPE_STR */
GPU_TEX1D_ARRAY = 1001,
GPU_TEX2D = 1002,
GPU_TEX2D_ARRAY = 1003,
GPU_TEX3D = 1004,
/* GLSL Struct types */
GPU_CLOSURE = 1007,
/* Opengl Attributes */
GPU_ATTR = 3001,
};
enum eGPUDefaultValue {
GPU_DEFAULT_0 = 0,
GPU_DEFAULT_1,
};
struct GPUMaterialAttribute {
GPUMaterialAttribute *next, *prev;
int type; /* eCustomDataType */
char name[68]; /* MAX_CUSTOMDATA_LAYER_NAME */
char input_name[12 + 1]; /* GPU_MAX_SAFE_ATTR_NAME + 1 */
eGPUType gputype;
eGPUDefaultValue default_value; /* Only for volumes attributes. */
int id;
int users;
/**
* If true, the corresponding attribute is the specified default color attribute on the mesh,
* if it exists. In that case the type and name data can vary per geometry, so it will not be
* valid here.
*/
bool is_default_color;
/**
* If true, the attribute is the length of hair particles and curves.
*/
bool is_hair_length;
};
struct GPUMaterialTexture {
GPUMaterialTexture *next, *prev;
Image *ima;
ImageUser iuser;
bool iuser_available;
GPUTexture **colorband;
GPUTexture **sky;
char sampler_name[32]; /* Name of sampler in GLSL. */
char tiled_mapping_name[32]; /* Name of tile mapping sampler in GLSL. */
int users;
GPUSamplerState sampler_state;
};
ListBase GPU_material_attributes(const GPUMaterial *material);
ListBase GPU_material_textures(GPUMaterial *material);
struct GPUUniformAttr {
GPUUniformAttr *next, *prev;
/* Meaningful part of the attribute set key. */
char name[68]; /* MAX_CUSTOMDATA_LAYER_NAME */
/** Hash of name[68] + use_dupli. */
uint32_t hash_code;
bool use_dupli;
/* Helper fields used by code generation. */
short id;
int users;
};
struct GPUUniformAttrList {
ListBase list; /* GPUUniformAttr */
/* List length and hash code precomputed for fast lookup and comparison. */
unsigned int count, hash_code;
};
const GPUUniformAttrList *GPU_material_uniform_attributes(const GPUMaterial *material);
/* Functions to create GPU Materials nodes. */
/* TODO: Move to its own header. */
struct GPUNodeStack {
eGPUType type;
@@ -101,27 +279,6 @@ struct GPUNodeStack {
bool end;
};
enum eGPUMaterialStatus {
GPU_MAT_FAILED = 0,
GPU_MAT_CREATED,
GPU_MAT_QUEUED,
GPU_MAT_SUCCESS,
};
/* GPU_MAT_OPTIMIZATION_SKIP for cases where we do not
* plan to perform optimization on a given material. */
enum eGPUMaterialOptimizationStatus {
GPU_MAT_OPTIMIZATION_SKIP = 0,
GPU_MAT_OPTIMIZATION_READY,
GPU_MAT_OPTIMIZATION_QUEUED,
GPU_MAT_OPTIMIZATION_SUCCESS,
};
enum eGPUDefaultValue {
GPU_DEFAULT_0 = 0,
GPU_DEFAULT_1,
};
struct GPUCodegenOutput {
std::string attr_load;
/* Node-tree functions calls. */
@@ -135,13 +292,6 @@ struct GPUCodegenOutput {
GPUShaderCreateInfo *create_info;
};
using GPUCodegenCallbackFn = void (*)(void *thunk, GPUMaterial *mat, GPUCodegenOutput *codegen);
/**
* Should return an already compiled pass if it's functionally equivalent to the one being
* compiled.
*/
using GPUMaterialPassReplacementCallbackFn = GPUPass *(*)(void *thunk, GPUMaterial *mat);
GPUNodeLink *GPU_constant(const float *num);
GPUNodeLink *GPU_uniform(const float *num);
GPUNodeLink *GPU_attribute(GPUMaterial *mat, eCustomDataType type, const char *name);
@@ -217,203 +367,9 @@ char *GPU_material_split_sub_function(GPUMaterial *material,
eGPUType return_type,
GPUNodeLink **link);
/**
* High level functions to create and use GPU materials.
*/
enum eGPUMaterialEngine {
GPU_MAT_EEVEE_LEGACY = 0,
GPU_MAT_EEVEE,
GPU_MAT_COMPOSITOR,
};
GPUMaterial *GPU_material_from_nodetree(
Scene *scene,
Material *ma,
bNodeTree *ntree,
ListBase *gpumaterials,
const char *name,
eGPUMaterialEngine engine,
uint64_t shader_uuid,
bool is_volume_shader,
bool is_lookdev,
GPUCodegenCallbackFn callback,
void *thunk,
GPUMaterialPassReplacementCallbackFn pass_replacement_cb = nullptr);
void GPU_material_compile(GPUMaterial *mat);
void GPU_material_free_single(GPUMaterial *material);
void GPU_material_free(ListBase *gpumaterial);
void GPU_material_async_compile(GPUMaterial *mat);
/** Returns true if the material have finished its compilation. */
bool GPU_material_async_try_finalize(GPUMaterial *mat);
void GPU_material_acquire(GPUMaterial *mat);
void GPU_material_release(GPUMaterial *mat);
void GPU_materials_free(Main *bmain);
Scene *GPU_material_scene(GPUMaterial *material);
GPUPass *GPU_material_get_pass(GPUMaterial *material);
/** Return the most optimal shader configuration for the given material. */
GPUShader *GPU_material_get_shader(GPUMaterial *material);
/** Return the base un-optimized shader. */
GPUShader *GPU_material_get_shader_base(GPUMaterial *material);
const char *GPU_material_get_name(GPUMaterial *material);
/**
* Material Optimization.
* \note Compiles optimal version of shader graph, populating mat->optimized_pass.
* This operation should always be deferred until existing compilations have completed.
* Default un-optimized materials will still exist for interactive material editing performance.
*/
void GPU_material_optimize(GPUMaterial *mat);
/**
* Return can be null if it's a world material.
*/
Material *GPU_material_get_material(GPUMaterial *material);
/**
* Return true if the material compilation has not yet begin or begin.
*/
eGPUMaterialStatus GPU_material_status(GPUMaterial *mat);
void GPU_material_status_set(GPUMaterial *mat, eGPUMaterialStatus status);
/**
* Return status for asynchronous optimization jobs.
*/
eGPUMaterialOptimizationStatus GPU_material_optimization_status(GPUMaterial *mat);
void GPU_material_optimization_status_set(GPUMaterial *mat, eGPUMaterialOptimizationStatus status);
bool GPU_material_optimization_ready(GPUMaterial *mat);
/**
* Store reference to a similar default material for asynchronous PSO cache warming.
*
* This function expects `material` to have not yet been compiled and for `default_material` to be
* ready. When compiling `material` as part of an asynchronous shader compilation job, use existing
* PSO descriptors from `default_material`'s shader to also compile PSOs for this new material
* asynchronously, rather than at runtime.
*
* The default_material `options` should match this new materials options in order
* for PSO descriptors to match those needed by the new `material`.
*
* NOTE: `default_material` must exist when `GPU_material_compile(..)` is called for
* `material`.
*
* See `GPU_shader_warm_cache(..)` for more information.
*/
void GPU_material_set_default(GPUMaterial *material, GPUMaterial *default_material);
GPUUniformBuf *GPU_material_uniform_buffer_get(GPUMaterial *material);
/**
* Create dynamic UBO from parameters
*
* \param inputs: Items are #LinkData, data is #GPUInput (`BLI_genericNodeN(GPUInput)`).
*/
void GPU_material_uniform_buffer_create(GPUMaterial *material, ListBase *inputs);
bool GPU_material_has_surface_output(GPUMaterial *mat);
bool GPU_material_has_volume_output(GPUMaterial *mat);
bool GPU_material_has_displacement_output(GPUMaterial *mat);
void GPU_material_flag_set(GPUMaterial *mat, eGPUMaterialFlag flag);
bool GPU_material_flag_get(const GPUMaterial *mat, eGPUMaterialFlag flag);
eGPUMaterialFlag GPU_material_flag(const GPUMaterial *mat);
bool GPU_material_recalc_flag_get(GPUMaterial *mat);
uint64_t GPU_material_uuid_get(GPUMaterial *mat);
void GPU_pass_cache_init();
void GPU_pass_cache_garbage_collect();
void GPU_pass_cache_free();
/* Requested Material Attributes and Textures */
struct GPUMaterialAttribute {
GPUMaterialAttribute *next, *prev;
int type; /* eCustomDataType */
char name[68]; /* MAX_CUSTOMDATA_LAYER_NAME */
char input_name[12 + 1]; /* GPU_MAX_SAFE_ATTR_NAME + 1 */
eGPUType gputype;
eGPUDefaultValue default_value; /* Only for volumes attributes. */
int id;
int users;
/**
* If true, the corresponding attribute is the specified default color attribute on the mesh,
* if it exists. In that case the type and name data can vary per geometry, so it will not be
* valid here.
*/
bool is_default_color;
/**
* If true, the attribute is the length of hair particles and curves.
*/
bool is_hair_length;
};
struct GPUMaterialTexture {
GPUMaterialTexture *next, *prev;
Image *ima;
ImageUser iuser;
bool iuser_available;
GPUTexture **colorband;
GPUTexture **sky;
char sampler_name[32]; /* Name of sampler in GLSL. */
char tiled_mapping_name[32]; /* Name of tile mapping sampler in GLSL. */
int users;
GPUSamplerState sampler_state;
};
ListBase GPU_material_attributes(const GPUMaterial *material);
ListBase GPU_material_textures(GPUMaterial *material);
struct GPUUniformAttr {
GPUUniformAttr *next, *prev;
/* Meaningful part of the attribute set key. */
char name[68]; /* MAX_CUSTOMDATA_LAYER_NAME */
/** Hash of name[68] + use_dupli. */
uint32_t hash_code;
bool use_dupli;
/* Helper fields used by code generation. */
short id;
int users;
};
struct GPUUniformAttrList {
ListBase list; /* GPUUniformAttr */
/* List length and hash code precomputed for fast lookup and comparison. */
unsigned int count, hash_code;
};
const GPUUniformAttrList *GPU_material_uniform_attributes(const GPUMaterial *material);
GHash *GPU_uniform_attr_list_hash_new(const char *info);
void GPU_uniform_attr_list_copy(GPUUniformAttrList *dest, const GPUUniformAttrList *src);
void GPU_uniform_attr_list_free(GPUUniformAttrList *set);
struct GPULayerAttr {
GPULayerAttr *next, *prev;
/* Meaningful part of the attribute set key. */
char name[256]; /* Multiple MAX_CUSTOMDATA_LAYER_NAME */
/** Hash of name[68]. */
uint32_t hash_code;
/* Helper fields used by code generation. */
int users;
};
const ListBase *GPU_material_layer_attributes(const GPUMaterial *material);
/* A callback passed to GPU_material_from_callbacks to construct the material graph by adding and
* linking the necessary GPU material nodes. */
using ConstructGPUMaterialFn = void (*)(void *thunk, GPUMaterial *material);
/* Construct a GPU material from a set of callbacks. See the callback types for more information.
* The given thunk will be passed as the first parameter of each callback. */
GPUMaterial *GPU_material_from_callbacks(eGPUMaterialEngine engine,
ConstructGPUMaterialFn construct_function_cb,
GPUCodegenCallbackFn generate_code_function_cb,
void *thunk);

View File

@@ -0,0 +1,48 @@
/* SPDX-FileCopyrightText: 2025 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
/** \file
* \ingroup gpu
*
* Generate and cache shaders generated from the intermediate node graph.
*/
#pragma once
#include "GPU_material.hh"
#include "GPU_shader.hh"
struct GPUNodeGraph;
struct GPUPass;
enum eGPUPassStatus {
GPU_PASS_FAILED = 0,
GPU_PASS_QUEUED,
GPU_PASS_SUCCESS,
};
GPUPass *GPU_generate_pass(GPUMaterial *material,
GPUNodeGraph *graph,
const char *debug_name,
eGPUMaterialEngine engine,
bool deferred_compilation,
GPUCodegenCallbackFn finalize_source_cb,
void *thunk,
bool optimize_graph);
eGPUPassStatus GPU_pass_status(GPUPass *pass);
bool GPU_pass_should_optimize(GPUPass *pass);
void GPU_pass_ensure_its_ready(GPUPass *pass);
GPUShader *GPU_pass_shader_get(GPUPass *pass);
void GPU_pass_acquire(GPUPass *pass);
void GPU_pass_release(GPUPass *pass);
uint64_t GPU_pass_global_compilation_count();
uint64_t GPU_pass_compilation_timestamp(GPUPass *pass);
void GPU_pass_cache_init();
void GPU_pass_cache_update();
void GPU_pass_cache_wait_for_all();
void GPU_pass_cache_free();

View File

@@ -105,6 +105,10 @@ blender::Vector<GPUShader *> GPU_shader_batch_finalize(BatchHandle &handle);
* WARNING: The handle will be invalidated by this call.
*/
void GPU_shader_batch_cancel(BatchHandle &handle);
/**
* Wait until all the requested batches have been compiled.
*/
void GPU_shader_batch_wait_for_all();
/** \} */

View File

@@ -10,184 +10,30 @@
#include "MEM_guardedalloc.h"
#include "DNA_customdata_types.h"
#include "DNA_material_types.h"
#include "BLI_ghash.h"
#include "BLI_hash_mm2a.hh"
#include "BLI_link_utils.h"
#include "BLI_listbase.h"
#include "BLI_span.hh"
#include "BLI_string.h"
#include "BLI_threads.h"
#include "BLI_time.h"
#include "BLI_vector.hh"
#include "BKE_cryptomatte.hh"
#include "BKE_material.hh"
#include "IMB_colormanagement.hh"
#include "GPU_capabilities.hh"
#include "GPU_context.hh"
#include "GPU_material.hh"
#include "GPU_shader.hh"
#include "GPU_uniform_buffer.hh"
#include "GPU_vertex_format.hh"
#include "BLI_sys_types.h" /* for intptr_t support */
#include "BLI_vector.hh"
#include "gpu_codegen.hh"
#include "gpu_node_graph.hh"
#include "gpu_shader_create_info.hh"
#include "gpu_shader_dependency_private.hh"
#include <cstdarg>
#include <cstring>
#include <sstream>
#include <string>
using namespace blender;
using namespace blender::gpu::shader;
/**
* IMPORTANT: Never add external reference. The GPUMaterial used to create the GPUPass (and its
* GPUCodegenCreateInfo) can be free before actually compiling. This happens if there is an update
* before deferred compilation happens and the GPUPass gets picked up by another GPUMaterial
* (because of GPUPass reuse).
*/
struct GPUCodegenCreateInfo : ShaderCreateInfo {
struct NameBuffer {
using NameEntry = std::array<char, 32>;
/** Duplicate attribute names to avoid reference the GPUNodeGraph directly. */
char attr_names[16][GPU_MAX_SAFE_ATTR_NAME + 1];
char var_names[16][8];
blender::Vector<std::unique_ptr<NameEntry>, 16> sampler_names;
/* Returns the appended name memory location */
const char *append_sampler_name(const char name[32])
{
auto index = sampler_names.size();
sampler_names.append(std::make_unique<NameEntry>());
char *name_buffer = sampler_names[index]->data();
memcpy(name_buffer, name, 32);
return name_buffer;
}
};
/** Optional generated interface. */
StageInterfaceInfo *interface_generated = nullptr;
/** Optional name buffer containing names referenced by StringRefNull. */
NameBuffer name_buffer;
GPUCodegenCreateInfo(const char *name) : ShaderCreateInfo(name){};
~GPUCodegenCreateInfo()
{
delete interface_generated;
};
};
struct GPUPass {
GPUPass *next = nullptr;
GPUShader *shader = nullptr;
GPUCodegenCreateInfo *create_info = nullptr;
/** Orphaned GPUPasses gets freed by the garbage collector. */
uint refcount = 0;
/** The last time the refcount was greater than 0. */
int gc_timestamp = 0;
/** The engine type this pass is compiled for. */
eGPUMaterialEngine engine = GPU_MAT_EEVEE_LEGACY;
/** Identity hash generated from all GLSL code. */
uint32_t hash = 0;
/** Did we already tried to compile the attached GPUShader. */
bool compiled = false;
/** If this pass is already being_compiled (A GPUPass can be shared by multiple GPUMaterials). */
bool compilation_requested = false;
/** Hint that an optimized variant of this pass should be created based on a complexity heuristic
* during pass code generation. */
bool should_optimize = false;
/** Whether pass is in the GPUPass cache. */
bool cached = false;
/** Protects pass shader from being created from multiple threads at the same time. */
ThreadMutex shader_creation_mutex = {};
BatchHandle async_compilation_handle = {};
};
/* -------------------------------------------------------------------- */
/** \name GPUPass Cache
*
* Internal shader cache: This prevent the shader recompilation / stall when
* using undo/redo AND also allows for GPUPass reuse if the Shader code is the
* same for 2 different Materials. Unused GPUPasses are free by Garbage collection.
* \{ */
/* Only use one linklist that contains the GPUPasses grouped by hash. */
static GPUPass *pass_cache = nullptr;
static SpinLock pass_cache_spin;
/* Search by hash only. Return first pass with the same hash.
* There is hash collision if (pass->next && pass->next->hash == hash) */
static GPUPass *gpu_pass_cache_lookup(eGPUMaterialEngine engine, uint32_t hash)
{
BLI_spin_lock(&pass_cache_spin);
/* Could be optimized with a Lookup table. */
for (GPUPass *pass = pass_cache; pass; pass = pass->next) {
if (pass->hash == hash && pass->engine == engine) {
BLI_spin_unlock(&pass_cache_spin);
return pass;
}
}
BLI_spin_unlock(&pass_cache_spin);
return nullptr;
}
static void gpu_pass_cache_insert_after(GPUPass *node, GPUPass *pass)
{
BLI_spin_lock(&pass_cache_spin);
pass->cached = true;
if (node != nullptr) {
/* Add after the first pass having the same hash. */
pass->next = node->next;
node->next = pass;
}
else {
/* No other pass have same hash, just prepend to the list. */
BLI_LINKS_PREPEND(pass_cache, pass);
}
BLI_spin_unlock(&pass_cache_spin);
}
/* Check all possible passes with the same hash. */
static GPUPass *gpu_pass_cache_resolve_collision(GPUPass *pass,
GPUShaderCreateInfo *info,
uint32_t hash)
{
eGPUMaterialEngine engine = pass->engine;
BLI_spin_lock(&pass_cache_spin);
for (; pass && (pass->hash == hash); pass = pass->next) {
if (*reinterpret_cast<ShaderCreateInfo *>(info) ==
*reinterpret_cast<ShaderCreateInfo *>(pass->create_info) &&
pass->engine == engine)
{
BLI_spin_unlock(&pass_cache_spin);
return pass;
}
}
BLI_spin_unlock(&pass_cache_spin);
return nullptr;
}
static bool gpu_pass_is_valid(const GPUPass *pass)
{
/* Shader is not null if compilation is successful. */
return (pass->compiled == false || pass->shader != nullptr);
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Type > string conversion
* \{ */
@@ -234,12 +80,12 @@ static std::ostream &operator<<(std::ostream &stream, const GPUOutput *output)
}
/* Print data constructor (i.e: vec2(1.0f, 1.0f)). */
static std::ostream &operator<<(std::ostream &stream, const blender::Span<float> &span)
static std::ostream &operator<<(std::ostream &stream, const Span<float> &span)
{
stream << (eGPUType)span.size() << "(";
/* Use uint representation to allow exact same bit pattern even if NaN. This is
* because we can pass UINTs as floats for constants. */
const blender::Span<uint32_t> uint_span = span.cast<uint32_t>();
const Span<uint32_t> uint_span = span.cast<uint32_t>();
for (const uint32_t &element : uint_span) {
char formatted_float[32];
SNPRINTF(formatted_float, "uintBitsToFloat(%uu)", element);
@@ -257,84 +103,57 @@ struct GPUConstant : public GPUInput {};
static std::ostream &operator<<(std::ostream &stream, const GPUConstant *input)
{
stream << blender::Span<float>(input->vec, input->type);
stream << Span<float>(input->vec, input->type);
return stream;
}
namespace blender::gpu::shader {
/* Needed to use the << operators from nested namespaces. :(
* https://stackoverflow.com/questions/5195512/namespaces-and-operator-resolution */
using ::operator<<;
} // namespace blender::gpu::shader
/** \} */
/* -------------------------------------------------------------------- */
/** \name GLSL code generation
* \{ */
class GPUCodegen {
public:
GPUMaterial &mat;
GPUNodeGraph &graph;
GPUCodegenOutput output = {};
GPUCodegenCreateInfo *create_info = nullptr;
const char *GPUCodegenCreateInfo::NameBuffer::append_sampler_name(const char name[32])
{
auto index = sampler_names.size();
sampler_names.append(std::make_unique<NameEntry>());
char *name_buffer = sampler_names[index]->data();
memcpy(name_buffer, name, 32);
return name_buffer;
}
private:
uint32_t hash_ = 0;
BLI_HashMurmur2A hm2a_;
ListBase ubo_inputs_ = {nullptr, nullptr};
GPUInput *cryptomatte_input_ = nullptr;
GPUCodegen::GPUCodegen(GPUMaterial *mat_, GPUNodeGraph *graph_, const char *debug_name)
: mat(*mat_), graph(*graph_)
{
BLI_hash_mm2a_init(&hm2a_, GPU_material_uuid_get(&mat));
BLI_hash_mm2a_add_int(&hm2a_, GPU_material_flag(&mat));
create_info = MEM_new<GPUCodegenCreateInfo>(__func__, debug_name);
output.create_info = reinterpret_cast<GPUShaderCreateInfo *>(
static_cast<ShaderCreateInfo *>(create_info));
}
/** Cache parameters for complexity heuristic. */
uint nodes_total_ = 0;
uint textures_total_ = 0;
uint uniforms_total_ = 0;
public:
GPUCodegen(GPUMaterial *mat_, GPUNodeGraph *graph_) : mat(*mat_), graph(*graph_)
{
BLI_hash_mm2a_init(&hm2a_, GPU_material_uuid_get(&mat));
BLI_hash_mm2a_add_int(&hm2a_, GPU_material_flag(&mat));
create_info = new GPUCodegenCreateInfo("codegen");
output.create_info = reinterpret_cast<GPUShaderCreateInfo *>(
static_cast<ShaderCreateInfo *>(create_info));
}
~GPUCodegen()
{
MEM_SAFE_FREE(cryptomatte_input_);
delete create_info;
BLI_freelistN(&ubo_inputs_);
};
void generate_graphs();
void generate_cryptomatte();
void generate_uniform_buffer();
void generate_attribs();
void generate_resources();
void generate_library();
uint32_t hash_get() const
{
return hash_;
}
/* Heuristic determined during pass codegen for whether a
* more optimal variant of this material should be compiled. */
bool should_optimize_heuristic() const
{
/* If each of the maximal attributes are exceeded, we can optimize, but we should also ensure
* the baseline is met. */
bool do_optimize = (nodes_total_ >= 60 || textures_total_ >= 4 || uniforms_total_ >= 64) &&
(textures_total_ >= 1 && uniforms_total_ >= 8 && nodes_total_ >= 4);
return do_optimize;
}
private:
void set_unique_ids();
void node_serialize(std::stringstream &eval_ss, const GPUNode *node);
std::string graph_serialize(eGPUNodeTag tree_tag,
GPUNodeLink *output_link,
const char *output_default = nullptr);
std::string graph_serialize(eGPUNodeTag tree_tag);
GPUCodegen::~GPUCodegen()
{
MEM_SAFE_FREE(cryptomatte_input_);
MEM_delete(create_info);
BLI_freelistN(&ubo_inputs_);
};
bool GPUCodegen::should_optimize_heuristic() const
{
/* If each of the maximal attributes are exceeded, we can optimize, but we should also ensure
* the baseline is met. */
bool do_optimize = (nodes_total_ >= 60 || textures_total_ >= 4 || uniforms_total_ >= 64) &&
(textures_total_ >= 1 && uniforms_total_ >= 8 && nodes_total_ >= 4);
return do_optimize;
}
void GPUCodegen::generate_attribs()
{
if (BLI_listbase_is_empty(&graph.attributes)) {
@@ -344,7 +163,7 @@ void GPUCodegen::generate_attribs()
GPUCodegenCreateInfo &info = *create_info;
info.interface_generated = new StageInterfaceInfo("codegen_iface", "var_attrs");
info.interface_generated = MEM_new<StageInterfaceInfo>(__func__, "codegen_iface", "var_attrs");
StageInterfaceInfo &iface = *info.interface_generated;
info.vertex_out(iface);
@@ -360,8 +179,8 @@ void GPUCodegen::generate_attribs()
STRNCPY(info.name_buffer.attr_names[slot], attr->input_name);
SNPRINTF(info.name_buffer.var_names[slot], "v%d", attr->id);
blender::StringRefNull attr_name = info.name_buffer.attr_names[slot];
blender::StringRefNull var_name = info.name_buffer.var_names[slot];
StringRefNull attr_name = info.name_buffer.attr_names[slot];
StringRefNull var_name = info.name_buffer.var_names[slot];
eGPUType input_type, iface_type;
@@ -470,7 +289,7 @@ void GPUCodegen::generate_library()
GPUCodegenCreateInfo &info = *create_info;
void *value;
blender::Vector<std::string> source_files;
Vector<std::string> source_files;
/* Iterate over libraries. We need to keep this struct intact in case it is required for the
* optimization pass. The first pass just collects the keys from the GSET, given items in a GSET
@@ -543,7 +362,7 @@ void GPUCodegen::node_serialize(std::stringstream &eval_ss, const GPUNode *node)
if (from == GPU_VEC4 && to == GPU_FLOAT) {
float coefficients[3];
IMB_colormanagement_get_luminance_coefficients(coefficients);
eval_ss << ", " << blender::Span<float>(coefficients, 3);
eval_ss << ", " << Span<float>(coefficients, 3);
}
eval_ss << ")";
@@ -628,8 +447,8 @@ void GPUCodegen::generate_cryptomatte()
float material_hash = 0.0f;
Material *material = GPU_material_get_material(&mat);
if (material) {
blender::bke::cryptomatte::CryptomatteHash hash(
material->id.name + 2, BLI_strnlen(material->id.name + 2, MAX_NAME - 2));
bke::cryptomatte::CryptomatteHash hash(material->id.name + 2,
BLI_strnlen(material->id.name + 2, MAX_NAME - 2));
material_hash = hash.float_encoded();
}
cryptomatte_input_->vec[0] = material_hash;
@@ -711,355 +530,3 @@ void GPUCodegen::generate_graphs()
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name GPUPass
* \{ */
GPUPass *GPU_generate_pass(GPUMaterial *material,
GPUNodeGraph *graph,
eGPUMaterialEngine engine,
GPUCodegenCallbackFn finalize_source_cb,
void *thunk,
bool optimize_graph)
{
gpu_node_graph_prune_unused(graph);
/* If Optimize flag is passed in, we are generating an optimized
* variant of the GPUMaterial's GPUPass. */
if (optimize_graph) {
gpu_node_graph_optimize(graph);
}
/* Extract attributes before compiling so the generated VBOs are ready to accept the future
* shader. */
gpu_node_graph_finalize_uniform_attrs(graph);
GPUCodegen codegen(material, graph);
codegen.generate_graphs();
codegen.generate_cryptomatte();
GPUPass *pass_hash = nullptr;
if (!optimize_graph) {
/* The optimized version of the shader should not re-generate a UBO.
* The UBO will not be used for this variant. */
codegen.generate_uniform_buffer();
/** Cache lookup: Reuse shaders already compiled.
* NOTE: We only perform cache look-up for non-optimized shader
* graphs, as baked constant data among other optimizations will generate too many
* shader source permutations, with minimal re-usability. */
pass_hash = gpu_pass_cache_lookup(engine, codegen.hash_get());
/* FIXME(fclem): This is broken. Since we only check for the hash and not the full source
* there is no way to have a collision currently. Some advocated to only use a bigger hash. */
if (pass_hash && (pass_hash->next == nullptr || pass_hash->next->hash != codegen.hash_get())) {
if (!gpu_pass_is_valid(pass_hash)) {
/* Shader has already been created but failed to compile. */
return nullptr;
}
/* No collision, just return the pass. */
BLI_spin_lock(&pass_cache_spin);
pass_hash->refcount += 1;
BLI_spin_unlock(&pass_cache_spin);
return pass_hash;
}
}
/* Either the shader is not compiled or there is a hash collision...
* continue generating the shader strings. */
codegen.generate_attribs();
codegen.generate_resources();
codegen.generate_library();
/* Make engine add its own code and implement the generated functions. */
finalize_source_cb(thunk, material, &codegen.output);
GPUPass *pass = nullptr;
if (pass_hash) {
/* Cache lookup: Reuse shaders already compiled. */
pass = gpu_pass_cache_resolve_collision(
pass_hash, codegen.output.create_info, codegen.hash_get());
}
if (pass) {
/* Cache hit. Reuse the same GPUPass and GPUShader. */
if (!gpu_pass_is_valid(pass)) {
/* Shader has already been created but failed to compile. */
return nullptr;
}
BLI_spin_lock(&pass_cache_spin);
pass->refcount += 1;
BLI_spin_unlock(&pass_cache_spin);
}
else {
/* We still create a pass even if shader compilation
* fails to avoid trying to compile again and again. */
pass = MEM_new<GPUPass>("GPUPass");
pass->shader = nullptr;
pass->refcount = 1;
pass->create_info = codegen.create_info;
/* Finalize before adding the pass to the cache, to prevent race conditions. */
pass->create_info->finalize();
pass->engine = engine;
pass->hash = codegen.hash_get();
pass->compiled = false;
pass->compilation_requested = false;
pass->cached = false;
/* Only flag pass optimization hint if this is the first generated pass for a material.
* Optimized passes cannot be optimized further, even if the heuristic is still not
* favorable. */
pass->should_optimize = (!optimize_graph) && codegen.should_optimize_heuristic();
pass->async_compilation_handle = -1;
BLI_mutex_init(&pass->shader_creation_mutex);
codegen.create_info = nullptr;
/* Only insert non-optimized graphs into cache.
* Optimized graphs will continuously be recompiled with new unique source during material
* editing, and thus causing the cache to fill up quickly with materials offering minimal
* re-use. */
if (!optimize_graph) {
gpu_pass_cache_insert_after(pass_hash, pass);
}
}
return pass;
}
bool GPU_pass_should_optimize(GPUPass *pass)
{
/* Returns optimization heuristic prepared during
* initial codegen.
* NOTE: Optimization currently limited to Metal backend as repeated compilations required for
* material specialization cause impactful CPU stalls on OpenGL platforms. */
return (GPU_backend_get_type() == GPU_BACKEND_METAL) && pass->should_optimize;
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Compilation
* \{ */
static int count_active_texture_sampler(GPUPass *pass, GPUShader *shader)
{
int num_samplers = 0;
for (const ShaderCreateInfo::Resource &res : pass->create_info->pass_resources_) {
if (res.bind_type == ShaderCreateInfo::Resource::BindType::SAMPLER) {
if (GPU_shader_get_uniform(shader, res.sampler.name.c_str()) != -1) {
num_samplers += 1;
}
}
}
return num_samplers;
}
static bool gpu_pass_shader_validate(GPUPass *pass, GPUShader *shader)
{
if (shader == nullptr) {
return false;
}
/* NOTE: The only drawback of this method is that it will count a sampler
* used in the fragment shader and only declared (but not used) in the vertex
* shader as used by both. But this corner case is not happening for now. */
int active_samplers_len = count_active_texture_sampler(pass, shader);
/* Validate against opengl limit. */
if ((active_samplers_len > GPU_max_textures_frag()) ||
(active_samplers_len > GPU_max_textures_vert()))
{
return false;
}
if (pass->create_info->geometry_source_.is_empty() == false) {
if (active_samplers_len > GPU_max_textures_geom()) {
return false;
}
}
return (active_samplers_len * 3 <= GPU_max_textures());
}
GPUShaderCreateInfo *GPU_pass_begin_compilation(GPUPass *pass, const char *shname)
{
if (!pass->compilation_requested) {
pass->compilation_requested = true;
pass->create_info->name_ = shname;
GPUShaderCreateInfo *info = reinterpret_cast<GPUShaderCreateInfo *>(
static_cast<ShaderCreateInfo *>(pass->create_info));
return info;
}
return nullptr;
}
bool GPU_pass_finalize_compilation(GPUPass *pass, GPUShader *shader)
{
bool success = true;
if (!pass->compiled) {
/* NOTE: Some drivers / gpu allows more active samplers than the opengl limit.
* We need to make sure to count active samplers to avoid undefined behavior. */
if (!gpu_pass_shader_validate(pass, shader)) {
success = false;
if (shader != nullptr) {
fprintf(stderr, "GPUShader: error: too many samplers in shader.\n");
GPU_shader_free(shader);
shader = nullptr;
}
}
pass->shader = shader;
pass->compiled = true;
}
return success;
}
void GPU_pass_begin_async_compilation(GPUPass *pass, const char *shname)
{
BLI_mutex_lock(&pass->shader_creation_mutex);
if (pass->async_compilation_handle == -1) {
if (GPUShaderCreateInfo *info = GPU_pass_begin_compilation(pass, shname)) {
pass->async_compilation_handle = GPU_shader_batch_create_from_infos({info});
}
else {
/* The pass has been already compiled synchronously. */
BLI_assert(pass->compiled);
pass->async_compilation_handle = 0;
}
}
BLI_mutex_unlock(&pass->shader_creation_mutex);
}
bool GPU_pass_async_compilation_try_finalize(GPUPass *pass)
{
BLI_mutex_lock(&pass->shader_creation_mutex);
BLI_assert(pass->async_compilation_handle != -1);
if (pass->async_compilation_handle) {
if (GPU_shader_batch_is_ready(pass->async_compilation_handle)) {
GPU_pass_finalize_compilation(
pass, GPU_shader_batch_finalize(pass->async_compilation_handle).first());
}
}
BLI_mutex_unlock(&pass->shader_creation_mutex);
return pass->async_compilation_handle == 0;
}
bool GPU_pass_compile(GPUPass *pass, const char *shname)
{
BLI_mutex_lock(&pass->shader_creation_mutex);
bool success = true;
if (pass->async_compilation_handle > 0) {
/* We're trying to compile this pass synchronously, but there's a pending asynchronous
* compilation already started. */
success = GPU_pass_finalize_compilation(
pass, GPU_shader_batch_finalize(pass->async_compilation_handle).first());
}
else if (GPUShaderCreateInfo *info = GPU_pass_begin_compilation(pass, shname)) {
GPUShader *shader = GPU_shader_create_from_info(info);
success = GPU_pass_finalize_compilation(pass, shader);
}
BLI_mutex_unlock(&pass->shader_creation_mutex);
return success;
}
GPUShader *GPU_pass_shader_get(GPUPass *pass)
{
return pass->shader;
}
static void gpu_pass_free(GPUPass *pass)
{
BLI_assert(pass->refcount == 0);
BLI_mutex_end(&pass->shader_creation_mutex);
if (pass->shader) {
GPU_shader_free(pass->shader);
}
delete pass->create_info;
MEM_delete(pass);
}
void GPU_pass_acquire(GPUPass *pass)
{
BLI_spin_lock(&pass_cache_spin);
BLI_assert(pass->refcount > 0);
pass->refcount++;
BLI_spin_unlock(&pass_cache_spin);
}
void GPU_pass_release(GPUPass *pass)
{
BLI_spin_lock(&pass_cache_spin);
BLI_assert(pass->refcount > 0);
pass->refcount--;
/* Un-cached passes will not be filtered by garbage collection, so release here. */
if (pass->refcount == 0 && !pass->cached) {
gpu_pass_free(pass);
}
BLI_spin_unlock(&pass_cache_spin);
}
void GPU_pass_cache_garbage_collect()
{
const int shadercollectrate = 60; /* hardcoded for now. */
int ctime = int(BLI_time_now_seconds());
BLI_spin_lock(&pass_cache_spin);
GPUPass *next, **prev_pass = &pass_cache;
for (GPUPass *pass = pass_cache; pass; pass = next) {
next = pass->next;
if (pass->refcount > 0) {
pass->gc_timestamp = ctime;
}
else if (pass->gc_timestamp + shadercollectrate < ctime) {
/* Remove from list */
*prev_pass = next;
gpu_pass_free(pass);
continue;
}
prev_pass = &pass->next;
}
BLI_spin_unlock(&pass_cache_spin);
}
void GPU_pass_cache_init()
{
BLI_spin_init(&pass_cache_spin);
}
void GPU_pass_cache_free()
{
BLI_spin_lock(&pass_cache_spin);
while (pass_cache) {
GPUPass *next = pass_cache->next;
gpu_pass_free(pass_cache);
pass_cache = next;
}
BLI_spin_unlock(&pass_cache_spin);
BLI_spin_end(&pass_cache_spin);
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Module
* \{ */
void gpu_codegen_init() {}
void gpu_codegen_exit()
{
BKE_material_defaults_free_gpu();
GPU_shader_free_builtin_shaders();
}
/** \} */

View File

@@ -10,39 +10,97 @@
#pragma once
#include "BLI_hash_mm2a.hh"
#include "BLI_listbase.h"
#include "BLI_vector.hh"
#include "GPU_material.hh"
#include "GPU_shader.hh"
#include "GPU_vertex_format.hh"
#include "gpu_node_graph.hh"
#include "gpu_shader_create_info.hh"
struct GPUNodeGraph;
#include <sstream>
#include <string>
struct GPUPass;
namespace blender::gpu::shader {
/* Pass */
struct GPUCodegenCreateInfo : ShaderCreateInfo {
struct NameBuffer {
using NameEntry = std::array<char, 32>;
GPUPass *GPU_generate_pass(GPUMaterial *material,
GPUNodeGraph *graph,
eGPUMaterialEngine engine,
GPUCodegenCallbackFn finalize_source_cb,
void *thunk,
bool optimize_graph);
GPUShader *GPU_pass_shader_get(GPUPass *pass);
bool GPU_pass_compile(GPUPass *pass, const char *shname);
void GPU_pass_acquire(GPUPass *pass);
void GPU_pass_release(GPUPass *pass);
bool GPU_pass_should_optimize(GPUPass *pass);
/** Duplicate attribute names to avoid reference the GPUNodeGraph directly. */
char attr_names[16][GPU_MAX_SAFE_ATTR_NAME + 1];
char var_names[16][8];
Vector<std::unique_ptr<NameEntry>, 16> sampler_names;
/* Custom pass compilation. */
/* Returns the appended name memory location */
const char *append_sampler_name(const char name[32]);
};
GPUShaderCreateInfo *GPU_pass_begin_compilation(GPUPass *pass, const char *shname);
bool GPU_pass_finalize_compilation(GPUPass *pass, GPUShader *shader);
/** Optional generated interface. */
StageInterfaceInfo *interface_generated = nullptr;
/** Optional name buffer containing names referenced by StringRefNull. */
NameBuffer name_buffer;
/** Copy of the GPUMaterial name, to prevent dangling pointers. */
std::string info_name_;
void GPU_pass_begin_async_compilation(GPUPass *pass, const char *shname);
/** NOTE: Unlike the non-async version, this one returns true when compilation has finalized,
* regardless if it succeeded or not.
* To check for success, see if `GPU_pass_shader_get() != nullptr`. */
bool GPU_pass_async_compilation_try_finalize(GPUPass *pass);
GPUCodegenCreateInfo(const char *name) : ShaderCreateInfo(name), info_name_(name)
{
/* Base class is always initialized first, so we need to update the name_ pointer here. */
name_ = info_name_.c_str();
};
~GPUCodegenCreateInfo()
{
MEM_delete(interface_generated);
}
};
/* Module */
class GPUCodegen {
public:
GPUMaterial &mat;
GPUNodeGraph &graph;
GPUCodegenOutput output = {};
GPUCodegenCreateInfo *create_info = nullptr;
void gpu_codegen_init();
void gpu_codegen_exit();
private:
uint32_t hash_ = 0;
BLI_HashMurmur2A hm2a_;
ListBase ubo_inputs_ = {nullptr, nullptr};
GPUInput *cryptomatte_input_ = nullptr;
/** Cache parameters for complexity heuristic. */
uint nodes_total_ = 0;
uint textures_total_ = 0;
uint uniforms_total_ = 0;
public:
GPUCodegen(GPUMaterial *mat_, GPUNodeGraph *graph_, const char *debug_name);
~GPUCodegen();
void generate_graphs();
void generate_cryptomatte();
void generate_uniform_buffer();
void generate_attribs();
void generate_resources();
void generate_library();
uint32_t hash_get() const
{
return hash_;
}
/* Heuristic determined during pass codegen for whether a
* more optimal variant of this material should be compiled. */
bool should_optimize_heuristic() const;
private:
void set_unique_ids();
void node_serialize(std::stringstream &eval_ss, const GPUNode *node);
std::string graph_serialize(eGPUNodeTag tree_tag,
GPUNodeLink *output_link,
const char *output_default = nullptr);
std::string graph_serialize(eGPUNodeTag tree_tag);
};
} // namespace blender::gpu::shader

View File

@@ -27,6 +27,7 @@
#include "GPU_context.hh"
#include "GPU_batch.hh"
#include "GPU_pass.hh"
#include "gpu_backend.hh"
#include "gpu_context_private.hh"
#include "gpu_matrix_private.hh"
@@ -328,6 +329,8 @@ void GPU_render_step(bool force_resource_release)
backend->render_step(force_resource_release);
printf_begin(active_ctx);
}
GPU_pass_cache_update();
}
/** \} */

View File

@@ -6,10 +6,12 @@
* \ingroup gpu
*/
#include "GPU_init_exit.hh" /* interface */
#include "GPU_batch.hh"
#include "BKE_material.hh"
#include "GPU_batch.hh"
#include "GPU_init_exit.hh" /* interface */
#include "GPU_pass.hh"
#include "intern/gpu_codegen.hh"
#include "intern/gpu_private.hh"
#include "intern/gpu_shader_create_info_private.hh"
#include "intern/gpu_shader_dependency_private.hh"
@@ -34,7 +36,7 @@ void GPU_init()
gpu_shader_dependency_init();
gpu_shader_create_info_init();
gpu_codegen_init();
GPU_pass_cache_init();
gpu_batch_init();
}
@@ -43,7 +45,10 @@ void GPU_exit()
{
gpu_batch_exit();
gpu_codegen_exit();
GPU_pass_cache_free();
BKE_material_defaults_free_gpu();
GPU_shader_free_builtin_shaders();
gpu_backend_delete_resources();

View File

@@ -29,33 +29,24 @@
#include "NOD_shader.h"
#include "GPU_material.hh"
#include "GPU_pass.hh"
#include "GPU_shader.hh"
#include "GPU_texture.hh"
#include "GPU_uniform_buffer.hh"
#include "DRW_engine.hh"
#include "gpu_codegen.hh"
#include "gpu_node_graph.hh"
#include "atomic_ops.h"
static void gpu_material_ramp_texture_build(GPUMaterial *mat);
static void gpu_material_sky_texture_build(GPUMaterial *mat);
/* Structs */
#define MAX_COLOR_BAND 128
#define MAX_GPU_SKIES 8
/**
* Whether the optimized variant of the GPUPass should be created asynchronously.
* Usage of this depends on whether there are possible threading challenges of doing so.
* Currently, the overhead of GPU_generate_pass is relatively small in comparison to shader
* compilation, though this option exists in case any potential scenarios for material graph
* optimization cause a slow down on the main thread.
*
* NOTE: The actual shader program for the optimized pass will always be compiled asynchronously,
* this flag controls whether shader node graph source serialization happens on the compilation
* worker thread as well. */
#define ASYNC_OPTIMIZED_PASS_CREATION 0
struct GPUColorBandBuilder {
float pixels[MAX_COLOR_BAND][CM_TABLE + 1][4];
int current_layer;
@@ -68,82 +59,354 @@ struct GPUSkyBuilder {
struct GPUMaterial {
/* Contains #GPUShader and source code for deferred compilation.
* Can be shared between similar material (i.e: sharing same node-tree topology). */
GPUPass *pass;
* Can be shared between materials sharing same node-tree topology. */
GPUPass *pass = nullptr;
/* Optimized GPUPass, situationally compiled after initial pass for optimal realtime performance.
* This shader variant bakes dynamic uniform data as constant. This variant will not use
* the ubo, and instead bake constants directly into the shader source. */
GPUPass *optimized_pass;
/* Optimization status.
* We also use this status to determine whether this material should be considered for
* optimization. Only sufficiently complex shaders benefit from constant-folding optimizations.
* `GPU_MAT_OPTIMIZATION_READY` -> shader should be optimized and is ready for optimization.
* `GPU_MAT_OPTIMIZATION_SKIP` -> Shader should not be optimized as it would not benefit
* performance to do so, based on the heuristic.
*/
eGPUMaterialOptimizationStatus optimization_status;
double creation_time;
#if ASYNC_OPTIMIZED_PASS_CREATION == 1
struct DeferredOptimizePass {
GPUCodegenCallbackFn callback;
void *thunk;
} DeferredOptimizePass;
struct DeferredOptimizePass optimize_pass_info;
#endif
GPUPass *optimized_pass = nullptr;
/** UBOs for this material parameters. */
GPUUniformBuf *ubo;
/** Compilation status. Do not use if shader is not GPU_MAT_SUCCESS. */
eGPUMaterialStatus status;
/** Some flags about the nodetree & the needed resources. */
eGPUMaterialFlag flag;
/** The engine type this material is compiled for. */
/* UBOs for this material parameters. */
GPUUniformBuf *ubo = nullptr;
/* Some flags about the nodetree & the needed resources. */
eGPUMaterialFlag flag = GPU_MATFLAG_UPDATED;
/* The engine type this material is compiled for. */
eGPUMaterialEngine engine;
/* Identify shader variations (shadow, probe, world background...) */
uint64_t uuid;
uint64_t uuid = 0;
/* Number of generated function. */
int generated_function_len;
/** Object type for attribute fetching. */
bool is_volume_shader;
int generated_function_len = 0;
/** DEPRECATED Currently only used for deferred compilation. */
Scene *scene;
/** Source material, might be null. */
Material *ma;
/** 1D Texture array containing all color bands. */
GPUTexture *coba_tex;
/** Builder for coba_tex. */
GPUColorBandBuilder *coba_builder;
/** 2D Texture array containing all sky textures. */
GPUTexture *sky_tex;
/** Builder for sky_tex. */
GPUSkyBuilder *sky_builder;
/* Source material, might be null. */
Material *source_material = nullptr;
/* 1D Texture array containing all color bands. */
GPUTexture *coba_tex = nullptr;
/* Builder for coba_tex. */
GPUColorBandBuilder *coba_builder = nullptr;
/* 2D Texture array containing all sky textures. */
GPUTexture *sky_tex = nullptr;
/* Builder for sky_tex. */
GPUSkyBuilder *sky_builder = nullptr;
/* Low level node graph(s). Also contains resources needed by the material. */
GPUNodeGraph graph;
GPUNodeGraph graph = {};
/** Default material reference used for PSO cache warming. Default materials may perform
* different operations, but the permutation will frequently share the same input PSO
* descriptors. This enables asynchronous PSO compilation as part of the deferred compilation
* pass, reducing runtime stuttering and responsiveness while compiling materials. */
GPUMaterial *default_mat;
bool has_surface_output = false;
bool has_volume_output = false;
bool has_displacement_output = false;
/** DEPRECATED: To remove. */
bool has_surface_output;
bool has_volume_output;
bool has_displacement_output;
std::string name;
uint32_t refcount;
GPUMaterial(eGPUMaterialEngine engine) : engine(engine)
{
graph.used_libraries = BLI_gset_new(
BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, "GPUNodeGraph.used_libraries");
};
bool do_batch_compilation;
~GPUMaterial()
{
gpu_node_graph_free(&graph);
#ifndef NDEBUG
char name[64];
#else
char name[16];
#endif
if (optimized_pass != nullptr) {
GPU_pass_release(optimized_pass);
}
if (pass != nullptr) {
GPU_pass_release(pass);
}
if (ubo != nullptr) {
GPU_uniformbuf_free(ubo);
}
if (coba_builder != nullptr) {
MEM_freeN(coba_builder);
}
if (coba_tex != nullptr) {
GPU_texture_free(coba_tex);
}
if (sky_tex != nullptr) {
GPU_texture_free(sky_tex);
}
}
};
/* Functions */
/* Public API */
GPUMaterial *GPU_material_from_nodetree(Material *ma,
bNodeTree *ntree,
ListBase *gpumaterials,
const char *name,
eGPUMaterialEngine engine,
uint64_t shader_uuid,
bool deferred_compilation,
GPUCodegenCallbackFn callback,
void *thunk,
GPUMaterialPassReplacementCallbackFn pass_replacement_cb)
{
/* Search if this material is not already compiled. */
LISTBASE_FOREACH (LinkData *, link, gpumaterials) {
GPUMaterial *mat = (GPUMaterial *)link->data;
if (mat->uuid == shader_uuid && mat->engine == engine) {
if (!deferred_compilation) {
GPU_pass_ensure_its_ready(mat->pass);
}
return mat;
}
}
GPUMaterial *mat = MEM_new<GPUMaterial>(__func__, engine);
mat->source_material = ma;
mat->uuid = shader_uuid;
mat->name = name;
/* Localize tree to create links for reroute and mute. */
bNodeTree *localtree = blender::bke::node_tree_localize(ntree, nullptr);
ntreeGPUMaterialNodes(localtree, mat);
gpu_material_ramp_texture_build(mat);
gpu_material_sky_texture_build(mat);
/* Use default material pass when possible. */
if (GPUPass *default_pass = pass_replacement_cb ? pass_replacement_cb(thunk, mat) : nullptr) {
mat->pass = default_pass;
GPU_pass_acquire(mat->pass);
/** WORKAROUND:
* The node tree code is never executed in default replaced passes,
* but the GPU validation will still complain if the node tree UBO is not bound.
* So we create a dummy UBO with (at least) the size of the default material one (192 bytes).
* We allocate 256 bytes to leave some room for future changes. */
mat->ubo = GPU_uniformbuf_create_ex(256, nullptr, "Dummy UBO");
}
else {
/* Create source code and search pass cache for an already compiled version. */
mat->pass = GPU_generate_pass(
mat, &mat->graph, mat->name.c_str(), engine, deferred_compilation, callback, thunk, false);
}
/* Determine whether we should generate an optimized variant of the graph.
* Heuristic is based on complexity of default material pass and shader node graph. */
if (GPU_pass_should_optimize(mat->pass)) {
mat->optimized_pass = GPU_generate_pass(
mat, &mat->graph, mat->name.c_str(), engine, true, callback, thunk, true);
}
gpu_node_graph_free_nodes(&mat->graph);
/* Only free after GPU_pass_shader_get where GPUUniformBuf read data from the local tree. */
blender::bke::node_tree_free_local_tree(localtree);
BLI_assert(!localtree->id.py_instance); /* Or call #BKE_libblock_free_data_py. */
MEM_freeN(localtree);
/* Note that even if building the shader fails in some way, we want to keep
* it to avoid trying to compile again and again, and simply do not use
* the actual shader on drawing. */
LinkData *link = MEM_callocN<LinkData>("GPUMaterialLink");
link->data = mat;
BLI_addtail(gpumaterials, link);
return mat;
}
GPUMaterial *GPU_material_from_callbacks(eGPUMaterialEngine engine,
ConstructGPUMaterialFn construct_function_cb,
GPUCodegenCallbackFn generate_code_function_cb,
void *thunk)
{
/* Allocate a new material and its material graph. */
GPUMaterial *material = MEM_new<GPUMaterial>(__func__, engine);
/* Construct the material graph by adding and linking the necessary GPU material nodes. */
construct_function_cb(thunk, material);
/* Create and initialize the texture storing color bands used by Ramp and Curve nodes. */
gpu_material_ramp_texture_build(material);
/* Lookup an existing pass in the cache or generate a new one. */
material->pass = GPU_generate_pass(material,
&material->graph,
__func__,
engine,
false,
generate_code_function_cb,
thunk,
false);
/* Determine whether we should generate an optimized variant of the graph.
* Heuristic is based on complexity of default material pass and shader node graph. */
if (GPU_pass_should_optimize(material->pass)) {
material->optimized_pass = GPU_generate_pass(material,
&material->graph,
__func__,
engine,
true,
generate_code_function_cb,
thunk,
true);
}
gpu_node_graph_free_nodes(&material->graph);
return material;
}
void GPU_material_free_single(GPUMaterial *material)
{
MEM_delete(material);
}
void GPU_material_free(ListBase *gpumaterial)
{
LISTBASE_FOREACH (LinkData *, link, gpumaterial) {
GPUMaterial *material = static_cast<GPUMaterial *>(link->data);
GPU_material_free_single(material);
}
BLI_freelistN(gpumaterial);
}
void GPU_materials_free(Main *bmain)
{
LISTBASE_FOREACH (Material *, ma, &bmain->materials) {
GPU_material_free(&ma->gpumaterial);
}
LISTBASE_FOREACH (World *, wo, &bmain->worlds) {
GPU_material_free(&wo->gpumaterial);
}
BKE_material_defaults_free_gpu();
}
const char *GPU_material_get_name(GPUMaterial *material)
{
return material->name.c_str();
}
uint64_t GPU_material_uuid_get(GPUMaterial *mat)
{
return mat->uuid;
}
Material *GPU_material_get_material(GPUMaterial *material)
{
return material->source_material;
}
GPUPass *GPU_material_get_pass(GPUMaterial *material)
{
/* If an optimized pass variant is available, and optimization is
* flagged as complete, we use this one instead. */
return GPU_material_optimization_status(material) == GPU_MAT_OPTIMIZATION_SUCCESS ?
material->optimized_pass :
material->pass;
}
GPUShader *GPU_material_get_shader(GPUMaterial *material)
{
return GPU_pass_shader_get(GPU_material_get_pass(material));
}
eGPUMaterialStatus GPU_material_status(GPUMaterial *mat)
{
switch (GPU_pass_status(mat->pass)) {
case GPU_PASS_SUCCESS:
return GPU_MAT_SUCCESS;
case GPU_PASS_QUEUED:
return GPU_MAT_QUEUED;
default:
return GPU_MAT_FAILED;
}
}
eGPUMaterialOptimizationStatus GPU_material_optimization_status(GPUMaterial *mat)
{
if (!GPU_pass_should_optimize(mat->pass)) {
return GPU_MAT_OPTIMIZATION_SKIP;
}
switch (GPU_pass_status(mat->optimized_pass)) {
case GPU_PASS_SUCCESS:
return GPU_MAT_OPTIMIZATION_SUCCESS;
case GPU_PASS_QUEUED:
return GPU_MAT_OPTIMIZATION_QUEUED;
default:
BLI_assert_unreachable();
return GPU_MAT_OPTIMIZATION_SKIP;
}
}
uint64_t GPU_material_compilation_timestamp(GPUMaterial *mat)
{
return GPU_pass_compilation_timestamp(mat->pass);
}
bool GPU_material_has_surface_output(GPUMaterial *mat)
{
return mat->has_surface_output;
}
bool GPU_material_has_volume_output(GPUMaterial *mat)
{
return mat->has_volume_output;
}
bool GPU_material_has_displacement_output(GPUMaterial *mat)
{
return mat->has_displacement_output;
}
bool GPU_material_flag_get(const GPUMaterial *mat, eGPUMaterialFlag flag)
{
return (mat->flag & flag) != 0;
}
eGPUMaterialFlag GPU_material_flag(const GPUMaterial *mat)
{
return mat->flag;
}
void GPU_material_flag_set(GPUMaterial *mat, eGPUMaterialFlag flag)
{
if ((flag & GPU_MATFLAG_GLOSSY) && (mat->flag & GPU_MATFLAG_GLOSSY)) {
/* Tag material using multiple glossy BSDF as using clear coat. */
mat->flag |= GPU_MATFLAG_COAT;
}
mat->flag |= flag;
}
void GPU_material_uniform_buffer_create(GPUMaterial *material, ListBase *inputs)
{
material->ubo = GPU_uniformbuf_create_from_list(inputs, material->name.c_str());
}
GPUUniformBuf *GPU_material_uniform_buffer_get(GPUMaterial *material)
{
return material->ubo;
}
ListBase GPU_material_attributes(const GPUMaterial *material)
{
return material->graph.attributes;
}
ListBase GPU_material_textures(GPUMaterial *material)
{
return material->graph.textures;
}
const GPUUniformAttrList *GPU_material_uniform_attributes(const GPUMaterial *material)
{
const GPUUniformAttrList *attrs = &material->graph.uniform_attrs;
return attrs->count > 0 ? attrs : nullptr;
}
const ListBase *GPU_material_layer_attributes(const GPUMaterial *material)
{
const ListBase *attrs = &material->graph.layer_attrs;
return !BLI_listbase_is_empty(attrs) ? attrs : nullptr;
}
GPUNodeGraph *gpu_material_node_graph(GPUMaterial *material)
{
return &material->graph;
}
/* Resources */
GPUTexture **gpu_material_sky_texture_layer_set(
GPUMaterial *mat, int width, int height, const float *pixels, float *row)
@@ -243,119 +506,7 @@ static void gpu_material_sky_texture_build(GPUMaterial *mat)
mat->sky_builder = nullptr;
}
void GPU_material_free_single(GPUMaterial *material)
{
bool do_free = atomic_sub_and_fetch_uint32(&material->refcount, 1) == 0;
if (!do_free) {
return;
}
gpu_node_graph_free(&material->graph);
if (material->optimized_pass != nullptr) {
GPU_pass_release(material->optimized_pass);
}
if (material->pass != nullptr) {
GPU_pass_release(material->pass);
}
if (material->ubo != nullptr) {
GPU_uniformbuf_free(material->ubo);
}
if (material->coba_builder != nullptr) {
MEM_freeN(material->coba_builder);
}
if (material->coba_tex != nullptr) {
GPU_texture_free(material->coba_tex);
}
if (material->sky_tex != nullptr) {
GPU_texture_free(material->sky_tex);
}
MEM_freeN(material);
}
void GPU_material_free(ListBase *gpumaterial)
{
LISTBASE_FOREACH (LinkData *, link, gpumaterial) {
GPUMaterial *material = static_cast<GPUMaterial *>(link->data);
DRW_deferred_shader_remove(material);
GPU_material_free_single(material);
}
BLI_freelistN(gpumaterial);
}
Scene *GPU_material_scene(GPUMaterial *material)
{
return material->scene;
}
GPUPass *GPU_material_get_pass(GPUMaterial *material)
{
/* If an optimized pass variant is available, and optimization is
* flagged as complete, we use this one instead. */
return ((GPU_material_optimization_status(material) == GPU_MAT_OPTIMIZATION_SUCCESS) &&
material->optimized_pass) ?
material->optimized_pass :
material->pass;
}
GPUShader *GPU_material_get_shader(GPUMaterial *material)
{
/* If an optimized material shader variant is available, and optimization is
* flagged as complete, we use this one instead. */
GPUShader *shader = ((GPU_material_optimization_status(material) ==
GPU_MAT_OPTIMIZATION_SUCCESS) &&
material->optimized_pass) ?
GPU_pass_shader_get(material->optimized_pass) :
nullptr;
return (shader) ? shader : ((material->pass) ? GPU_pass_shader_get(material->pass) : nullptr);
}
GPUShader *GPU_material_get_shader_base(GPUMaterial *material)
{
return (material->pass) ? GPU_pass_shader_get(material->pass) : nullptr;
}
const char *GPU_material_get_name(GPUMaterial *material)
{
return material->name;
}
Material *GPU_material_get_material(GPUMaterial *material)
{
return material->ma;
}
GPUUniformBuf *GPU_material_uniform_buffer_get(GPUMaterial *material)
{
return material->ubo;
}
void GPU_material_uniform_buffer_create(GPUMaterial *material, ListBase *inputs)
{
material->ubo = GPU_uniformbuf_create_from_list(inputs, material->name);
}
ListBase GPU_material_attributes(const GPUMaterial *material)
{
return material->graph.attributes;
}
ListBase GPU_material_textures(GPUMaterial *material)
{
return material->graph.textures;
}
const GPUUniformAttrList *GPU_material_uniform_attributes(const GPUMaterial *material)
{
const GPUUniformAttrList *attrs = &material->graph.uniform_attrs;
return attrs->count > 0 ? attrs : nullptr;
}
const ListBase *GPU_material_layer_attributes(const GPUMaterial *material)
{
const ListBase *attrs = &material->graph.layer_attrs;
return !BLI_listbase_is_empty(attrs) ? attrs : nullptr;
}
/* Code generation */
void GPU_material_output_surface(GPUMaterial *material, GPUNodeLink *link)
{
@@ -430,476 +581,3 @@ char *GPU_material_split_sub_function(GPUMaterial *material,
return func_link->name;
}
GPUNodeGraph *gpu_material_node_graph(GPUMaterial *material)
{
return &material->graph;
}
eGPUMaterialStatus GPU_material_status(GPUMaterial *mat)
{
return mat->status;
}
void GPU_material_status_set(GPUMaterial *mat, eGPUMaterialStatus status)
{
mat->status = status;
}
eGPUMaterialOptimizationStatus GPU_material_optimization_status(GPUMaterial *mat)
{
return mat->optimization_status;
}
void GPU_material_optimization_status_set(GPUMaterial *mat, eGPUMaterialOptimizationStatus status)
{
mat->optimization_status = status;
if (mat->optimization_status == GPU_MAT_OPTIMIZATION_READY) {
/* Reset creation timer to delay optimization pass. */
mat->creation_time = BLI_time_now_seconds();
}
}
bool GPU_material_optimization_ready(GPUMaterial *mat)
{
/* Timer threshold before optimizations will be queued.
* When materials are frequently being modified, optimization
* can incur CPU overhead from excessive compilation.
*
* As the optimization is entirely asynchronous, it is still beneficial
* to do this quickly to avoid build-up and improve runtime performance.
* The threshold just prevents compilations being queued frame after frame. */
const double optimization_time_threshold_s = 1.2;
return ((BLI_time_now_seconds() - mat->creation_time) >= optimization_time_threshold_s);
}
void GPU_material_set_default(GPUMaterial *material, GPUMaterial *default_material)
{
if (material != default_material) {
material->default_mat = default_material;
}
}
/* Code generation */
bool GPU_material_has_surface_output(GPUMaterial *mat)
{
return mat->has_surface_output;
}
bool GPU_material_has_volume_output(GPUMaterial *mat)
{
return mat->has_volume_output;
}
bool GPU_material_has_displacement_output(GPUMaterial *mat)
{
return mat->has_displacement_output;
}
void GPU_material_flag_set(GPUMaterial *mat, eGPUMaterialFlag flag)
{
if ((flag & GPU_MATFLAG_GLOSSY) && (mat->flag & GPU_MATFLAG_GLOSSY)) {
/* Tag material using multiple glossy BSDF as using clear coat. */
mat->flag |= GPU_MATFLAG_COAT;
}
mat->flag |= flag;
}
bool GPU_material_flag_get(const GPUMaterial *mat, eGPUMaterialFlag flag)
{
return (mat->flag & flag) != 0;
}
eGPUMaterialFlag GPU_material_flag(const GPUMaterial *mat)
{
return mat->flag;
}
bool GPU_material_recalc_flag_get(GPUMaterial *mat)
{
/* NOTE: Consumes the flags. */
bool updated = (mat->flag & GPU_MATFLAG_UPDATED) != 0;
mat->flag &= ~GPU_MATFLAG_UPDATED;
return updated;
}
uint64_t GPU_material_uuid_get(GPUMaterial *mat)
{
return mat->uuid;
}
GPUMaterial *GPU_material_from_nodetree(Scene *scene,
Material *ma,
bNodeTree *ntree,
ListBase *gpumaterials,
const char *name,
eGPUMaterialEngine engine,
uint64_t shader_uuid,
bool is_volume_shader,
bool is_lookdev,
GPUCodegenCallbackFn callback,
void *thunk,
GPUMaterialPassReplacementCallbackFn pass_replacement_cb)
{
/* Search if this material is not already compiled. */
LISTBASE_FOREACH (LinkData *, link, gpumaterials) {
GPUMaterial *mat = (GPUMaterial *)link->data;
if (mat->uuid == shader_uuid && mat->engine == engine) {
return mat;
}
}
GPUMaterial *mat = MEM_callocN<GPUMaterial>("GPUMaterial");
mat->ma = ma;
mat->scene = scene;
mat->engine = engine;
mat->uuid = shader_uuid;
mat->flag = GPU_MATFLAG_UPDATED;
mat->status = GPU_MAT_CREATED;
mat->default_mat = nullptr;
mat->is_volume_shader = is_volume_shader;
mat->graph.used_libraries = BLI_gset_new(
BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, "GPUNodeGraph.used_libraries");
mat->refcount = 1;
STRNCPY(mat->name, name);
if (is_lookdev) {
mat->flag |= GPU_MATFLAG_LOOKDEV_HACK;
}
/* Localize tree to create links for reroute and mute. */
bNodeTree *localtree = blender::bke::node_tree_localize(ntree, nullptr);
ntreeGPUMaterialNodes(localtree, mat);
gpu_material_ramp_texture_build(mat);
gpu_material_sky_texture_build(mat);
/* Use default material pass when possible. */
if (GPUPass *default_pass = pass_replacement_cb ? pass_replacement_cb(thunk, mat) : nullptr) {
mat->pass = default_pass;
GPU_pass_acquire(mat->pass);
/** WORKAROUND:
* The node tree code is never executed in default replaced passes,
* but the GPU validation will still complain if the node tree UBO is not bound.
* So we create a dummy UBO with (at least) the size of the default material one (192 bytes).
* We allocate 256 bytes to leave some room for future changes. */
mat->ubo = GPU_uniformbuf_create_ex(256, nullptr, "Dummy UBO");
}
else {
/* Create source code and search pass cache for an already compiled version. */
mat->pass = GPU_generate_pass(mat, &mat->graph, engine, callback, thunk, false);
}
if (mat->pass == nullptr) {
/* We had a cache hit and the shader has already failed to compile. */
mat->status = GPU_MAT_FAILED;
gpu_node_graph_free(&mat->graph);
}
else {
/* Determine whether we should generate an optimized variant of the graph.
* Heuristic is based on complexity of default material pass and shader node graph. */
if (GPU_pass_should_optimize(mat->pass)) {
GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_READY);
}
GPUShader *sh = GPU_pass_shader_get(mat->pass);
if (sh != nullptr) {
/* We had a cache hit and the shader is already compiled. */
mat->status = GPU_MAT_SUCCESS;
if (mat->optimization_status == GPU_MAT_OPTIMIZATION_SKIP) {
gpu_node_graph_free_nodes(&mat->graph);
}
}
/* Generate optimized pass. */
if (mat->optimization_status == GPU_MAT_OPTIMIZATION_READY) {
#if ASYNC_OPTIMIZED_PASS_CREATION == 1
mat->optimized_pass = nullptr;
mat->optimize_pass_info.callback = callback;
mat->optimize_pass_info.thunk = thunk;
#else
mat->optimized_pass = GPU_generate_pass(mat, &mat->graph, engine, callback, thunk, true);
if (mat->optimized_pass == nullptr) {
/* Failed to create optimized pass. */
gpu_node_graph_free_nodes(&mat->graph);
GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP);
}
else {
GPUShader *optimized_sh = GPU_pass_shader_get(mat->optimized_pass);
if (optimized_sh != nullptr) {
/* Optimized shader already available. */
gpu_node_graph_free_nodes(&mat->graph);
GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SUCCESS);
}
}
#endif
}
}
/* Only free after GPU_pass_shader_get where GPUUniformBuf read data from the local tree. */
blender::bke::node_tree_free_local_tree(localtree);
BLI_assert(!localtree->id.py_instance); /* Or call #BKE_libblock_free_data_py. */
MEM_freeN(localtree);
/* Note that even if building the shader fails in some way, we still keep
* it to avoid trying to compile again and again, and simply do not use
* the actual shader on drawing. */
LinkData *link = MEM_callocN<LinkData>("GPUMaterialLink");
link->data = mat;
BLI_addtail(gpumaterials, link);
return mat;
}
void GPU_material_acquire(GPUMaterial *mat)
{
atomic_add_and_fetch_uint32(&mat->refcount, 1);
}
void GPU_material_release(GPUMaterial *mat)
{
GPU_material_free_single(mat);
}
static void gpu_material_finalize(GPUMaterial *mat, bool success)
{
mat->flag |= GPU_MATFLAG_UPDATED;
if (success) {
GPUShader *sh = GPU_pass_shader_get(mat->pass);
if (sh != nullptr) {
/** Perform asynchronous Render Pipeline State Object (PSO) compilation.
*
* Warm PSO cache within asynchronous compilation thread using default material as source.
* GPU_shader_warm_cache(..) performs the API-specific PSO compilation using the assigned
* parent shader's cached PSO descriptors as an input.
*
* This is only applied if the given material has a specified default reference
* material available, and the default material is already compiled.
*
* As PSOs do not always match for default shaders, we limit warming for PSO
* configurations to ensure compile time remains fast, as these first
* entries will be the most commonly used PSOs. As not all PSOs are necessarily
* required immediately, this limit should remain low (1-3 at most). */
if (!ELEM(mat->default_mat, nullptr, mat)) {
if (mat->default_mat->pass != nullptr) {
GPUShader *parent_sh = GPU_pass_shader_get(mat->default_mat->pass);
if (parent_sh) {
/* Skip warming if cached pass is identical to the default material. */
if (mat->default_mat->pass != mat->pass && parent_sh != sh) {
GPU_shader_set_parent(sh, parent_sh);
GPU_shader_warm_cache(sh, 1);
}
}
}
}
/* Flag success. */
mat->status = GPU_MAT_SUCCESS;
if (mat->optimization_status == GPU_MAT_OPTIMIZATION_SKIP) {
/* Only free node graph nodes if not required by secondary optimization pass. */
gpu_node_graph_free_nodes(&mat->graph);
}
}
else {
mat->status = GPU_MAT_FAILED;
}
}
else {
mat->status = GPU_MAT_FAILED;
GPU_pass_release(mat->pass);
mat->pass = nullptr;
gpu_node_graph_free(&mat->graph);
}
}
void GPU_material_compile(GPUMaterial *mat)
{
bool success;
BLI_assert(ELEM(mat->status, GPU_MAT_QUEUED, GPU_MAT_CREATED));
BLI_assert(mat->pass);
/* NOTE: The shader may have already been compiled here since we are
* sharing GPUShader across GPUMaterials. In this case it's a no-op. */
#ifndef NDEBUG
success = GPU_pass_compile(mat->pass, mat->name);
#else
success = GPU_pass_compile(mat->pass, __func__);
#endif
gpu_material_finalize(mat, success);
}
void GPU_material_async_compile(GPUMaterial *mat)
{
BLI_assert(ELEM(mat->status, GPU_MAT_QUEUED, GPU_MAT_CREATED));
BLI_assert(mat->pass);
#ifndef NDEBUG
const char *name = mat->name;
#else
const char *name = __func__;
#endif
GPU_pass_begin_async_compilation(mat->pass, name);
}
bool GPU_material_async_try_finalize(GPUMaterial *mat)
{
BLI_assert(ELEM(mat->status, GPU_MAT_QUEUED, GPU_MAT_CREATED));
if (GPU_pass_async_compilation_try_finalize(mat->pass)) {
gpu_material_finalize(mat, GPU_pass_shader_get(mat->pass) != nullptr);
return true;
}
return false;
}
void GPU_material_optimize(GPUMaterial *mat)
{
/* If shader is flagged for skipping optimization or has already been successfully
* optimized, skip. */
if (ELEM(mat->optimization_status, GPU_MAT_OPTIMIZATION_SKIP, GPU_MAT_OPTIMIZATION_SUCCESS)) {
return;
}
/* If original shader has not been fully compiled, we are not
* ready to perform optimization. */
if (mat->status != GPU_MAT_SUCCESS) {
/* Reset optimization status. */
GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_READY);
return;
}
#if ASYNC_OPTIMIZED_PASS_CREATION == 1
/* If the optimized pass is not valid, first generate optimized pass.
* NOTE(Threading): Need to verify if GPU_generate_pass can cause side-effects, especially when
* used with "thunk". So far, this appears to work, and deferring optimized pass creation is more
* optimal, as these do not benefit from caching, due to baked constants. However, this could
* possibly be cause for concern for certain cases. */
if (!mat->optimized_pass) {
mat->optimized_pass = GPU_generate_pass(mat,
&mat->graph,
mat->engine,
mat->optimize_pass_info.callback,
mat->optimize_pass_info.thunk,
true);
BLI_assert(mat->optimized_pass);
}
#else
if (!mat->optimized_pass) {
/* Optimized pass has not been created, skip future optimization attempts. */
GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP);
return;
}
#endif
bool success;
/* NOTE: The shader may have already been compiled here since we are
* sharing GPUShader across GPUMaterials. In this case it's a no-op. */
#ifndef NDEBUG
success = GPU_pass_compile(mat->optimized_pass, mat->name);
#else
success = GPU_pass_compile(mat->optimized_pass, __func__);
#endif
if (success) {
GPUShader *sh = GPU_pass_shader_get(mat->optimized_pass);
if (sh != nullptr) {
/** Perform asynchronous Render Pipeline State Object (PSO) compilation.
*
* Warm PSO cache within asynchronous compilation thread for optimized materials.
* This setup assigns the original unoptimized shader as a "parent" shader
* for the optimized version. This then allows the associated GPU backend to
* compile PSOs within this asynchronous pass, using the identical PSO descriptors of the
* parent shader.
*
* This eliminates all run-time stuttering associated with material optimization and ensures
* realtime material editing and animation remains seamless, while retaining optimal realtime
* performance. */
GPUShader *parent_sh = GPU_pass_shader_get(mat->pass);
if (parent_sh) {
GPU_shader_set_parent(sh, parent_sh);
GPU_shader_warm_cache(sh, -1);
}
/* Mark as complete. */
GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SUCCESS);
}
else {
/* Optimized pass failed to compile. Disable any future optimization attempts. */
GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP);
}
}
else {
/* Optimization pass generation failed. Disable future attempts to optimize. */
GPU_pass_release(mat->optimized_pass);
mat->optimized_pass = nullptr;
GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP);
}
/* Release node graph as no longer needed. */
gpu_node_graph_free_nodes(&mat->graph);
}
void GPU_materials_free(Main *bmain)
{
LISTBASE_FOREACH (Material *, ma, &bmain->materials) {
GPU_material_free(&ma->gpumaterial);
}
LISTBASE_FOREACH (World *, wo, &bmain->worlds) {
GPU_material_free(&wo->gpumaterial);
}
BKE_material_defaults_free_gpu();
}
GPUMaterial *GPU_material_from_callbacks(eGPUMaterialEngine engine,
ConstructGPUMaterialFn construct_function_cb,
GPUCodegenCallbackFn generate_code_function_cb,
void *thunk)
{
/* Allocate a new material and its material graph, and initialize its reference count. */
GPUMaterial *material = MEM_callocN<GPUMaterial>("GPUMaterial");
material->graph.used_libraries = BLI_gset_new(
BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, "GPUNodeGraph.used_libraries");
material->refcount = 1;
material->optimization_status = GPU_MAT_OPTIMIZATION_SKIP;
material->optimized_pass = nullptr;
material->default_mat = nullptr;
material->engine = engine;
/* Construct the material graph by adding and linking the necessary GPU material nodes. */
construct_function_cb(thunk, material);
/* Create and initialize the texture storing color bands used by Ramp and Curve nodes. */
gpu_material_ramp_texture_build(material);
/* Lookup an existing pass in the cache or generate a new one. */
material->pass = GPU_generate_pass(
material, &material->graph, material->engine, generate_code_function_cb, thunk, false);
material->optimized_pass = nullptr;
/* The pass already exists in the pass cache but its shader already failed to compile. */
if (material->pass == nullptr) {
material->status = GPU_MAT_FAILED;
gpu_node_graph_free(&material->graph);
return material;
}
/* The pass already exists in the pass cache and its shader is already compiled. */
GPUShader *shader = GPU_pass_shader_get(material->pass);
if (shader != nullptr) {
material->status = GPU_MAT_SUCCESS;
if (material->optimization_status == GPU_MAT_OPTIMIZATION_SKIP) {
/* Only free node graph if not required by secondary optimization pass. */
gpu_node_graph_free_nodes(&material->graph);
}
return material;
}
/* The material was created successfully but still needs to be compiled. */
material->status = GPU_MAT_CREATED;
return material;
}

View File

@@ -0,0 +1,417 @@
/* SPDX-FileCopyrightText: 2025 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
/** \file
* \ingroup gpu
*
* Convert material node-trees to GLSL.
*/
#include "MEM_guardedalloc.h"
#include "BLI_map.hh"
#include "BLI_span.hh"
#include "BLI_time.h"
#include "BLI_vector.hh"
#include "GPU_capabilities.hh"
#include "GPU_context.hh"
#include "GPU_pass.hh"
#include "GPU_vertex_format.hh"
#include "gpu_codegen.hh"
#include <mutex>
#include <string>
using namespace blender;
using namespace blender::gpu::shader;
static bool gpu_pass_validate(GPUCodegenCreateInfo *create_info);
/* -------------------------------------------------------------------- */
/** \name GPUPass
* \{ */
struct GPUPass {
static inline std::atomic<uint64_t> compilation_counts = 0;
GPUCodegenCreateInfo *create_info = nullptr;
BatchHandle compilation_handle = 0;
std::atomic<GPUShader *> shader = nullptr;
std::atomic<eGPUPassStatus> status = GPU_PASS_QUEUED;
/* Orphaned GPUPasses gets freed by the garbage collector. */
std::atomic<int> refcount = 1;
/* The last time the refcount was greater than 0. */
double gc_timestamp = 0.0f;
uint64_t compilation_timestamp = 0;
/** Hint that an optimized variant of this pass should be created.
* Based on a complexity heuristic from pass code generation. */
bool should_optimize = false;
bool is_optimization_pass = false;
GPUPass(GPUCodegenCreateInfo *info,
bool deferred_compilation,
bool is_optimization_pass,
bool should_optimize)
: create_info(info),
should_optimize(should_optimize),
is_optimization_pass(is_optimization_pass)
{
BLI_assert(!is_optimization_pass || !should_optimize);
if (is_optimization_pass && deferred_compilation) {
// Defer until all non optimization passes are compiled.
return;
}
GPUShaderCreateInfo *base_info = reinterpret_cast<GPUShaderCreateInfo *>(create_info);
if (deferred_compilation) {
compilation_handle = GPU_shader_batch_create_from_infos(
Span<GPUShaderCreateInfo *>(&base_info, 1));
}
else {
shader = GPU_shader_create_from_info(base_info);
finalize_compilation();
}
}
~GPUPass()
{
if (compilation_handle) {
GPU_shader_batch_cancel(compilation_handle);
}
else {
BLI_assert(create_info == nullptr || (is_optimization_pass && status == GPU_PASS_QUEUED));
}
MEM_delete(create_info);
GPU_SHADER_FREE_SAFE(shader);
}
void finalize_compilation()
{
BLI_assert_msg(create_info, "GPUPass::finalize_compilation() called more than once.");
if (compilation_handle) {
shader = GPU_shader_batch_finalize(compilation_handle).first();
}
compilation_timestamp = ++compilation_counts;
if (!shader && !gpu_pass_validate(create_info)) {
fprintf(stderr, "GPUShader: error: too many samplers in shader.\n");
}
status = shader ? GPU_PASS_SUCCESS : GPU_PASS_FAILED;
MEM_delete(create_info);
create_info = nullptr;
}
void update(double timestamp)
{
update_compilation();
update_gc_timestamp(timestamp);
}
void update_compilation()
{
if (compilation_handle) {
if (GPU_shader_batch_is_ready(compilation_handle)) {
finalize_compilation();
}
}
else if (status == GPU_PASS_QUEUED && refcount > 0) {
BLI_assert(is_optimization_pass);
GPUShaderCreateInfo *base_info = reinterpret_cast<GPUShaderCreateInfo *>(create_info);
compilation_handle = GPU_shader_batch_create_from_infos(
Span<GPUShaderCreateInfo *>(&base_info, 1));
}
}
void update_gc_timestamp(double timestamp)
{
if (refcount != 0 || gc_timestamp == 0.0f) {
gc_timestamp = timestamp;
}
}
bool should_gc(int gc_collect_rate, double timestamp)
{
BLI_assert(gc_timestamp != 0.0f);
return !compilation_handle && status != GPU_PASS_FAILED &&
(timestamp - gc_timestamp) >= gc_collect_rate;
}
};
eGPUPassStatus GPU_pass_status(GPUPass *pass)
{
return pass->status;
}
bool GPU_pass_should_optimize(GPUPass *pass)
{
/* Returns optimization heuristic prepared during
* initial codegen.
* NOTE: Only enabled on Metal, since it doesn't seem to yield any performance improvements for
* other backends. */
return (GPU_backend_get_type() == GPU_BACKEND_METAL) && pass->should_optimize;
#if 0
/* Returns optimization heuristic prepared during initial codegen.
* NOTE: Optimization limited to parallel compilation as it causes CPU stalls otherwise. */
return pass->should_optimize && GPU_use_parallel_compilation();
#endif
}
GPUShader *GPU_pass_shader_get(GPUPass *pass)
{
return pass->shader;
}
void GPU_pass_acquire(GPUPass *pass)
{
int previous_refcount = pass->refcount++;
UNUSED_VARS_NDEBUG(previous_refcount);
BLI_assert(previous_refcount > 0);
}
void GPU_pass_release(GPUPass *pass)
{
int previous_refcount = pass->refcount--;
UNUSED_VARS_NDEBUG(previous_refcount);
BLI_assert(previous_refcount > 0);
}
uint64_t GPU_pass_global_compilation_count()
{
return GPUPass::compilation_counts;
}
uint64_t GPU_pass_compilation_timestamp(GPUPass *pass)
{
return pass->compilation_timestamp;
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name GPUPass Cache
*
* Internal shader cache: This prevent the shader recompilation / stall when
* using undo/redo AND also allows for GPUPass reuse if the Shader code is the
* same for 2 different Materials. Unused GPUPasses are free by Garbage collection.
* \{ */
class GPUPassCache {
/* Number of seconds with 0 users required before garbage collecting a pass.*/
static constexpr float gc_collect_rate_ = 60.0f;
/* Number of seconds without base compilations required before starting to compile optimization
* passes.*/
static constexpr float optimization_delay_ = 10.0f;
double last_base_compilation_timestamp_ = -1.0;
Map<uint32_t, std::unique_ptr<GPUPass>> passes_[GPU_MAT_ENGINE_MAX][2 /*is_optimization_pass*/];
std::mutex mutex_;
public:
void add(eGPUMaterialEngine engine,
GPUCodegen &codegen,
bool deferred_compilation,
bool is_optimization_pass)
{
std::lock_guard lock(mutex_);
passes_[engine][is_optimization_pass].add(
codegen.hash_get(),
std::make_unique<GPUPass>(codegen.create_info,
deferred_compilation,
is_optimization_pass,
codegen.should_optimize_heuristic()));
};
GPUPass *get(eGPUMaterialEngine engine,
size_t hash,
bool allow_deferred,
bool is_optimization_pass)
{
std::lock_guard lock(mutex_);
std::unique_ptr<GPUPass> *pass = passes_[engine][is_optimization_pass].lookup_ptr(hash);
if (!allow_deferred && pass && pass->get()->status == GPU_PASS_QUEUED) {
pass->get()->finalize_compilation();
}
return pass ? pass->get() : nullptr;
}
void update()
{
std::lock_guard lock(mutex_);
double timestamp = BLI_time_now_seconds();
bool base_passes_ready = true;
/* Base Passes. */
for (auto &engine_passes : passes_) {
for (std::unique_ptr<GPUPass> &pass : engine_passes[false].values()) {
pass->update(timestamp);
if (pass->status == GPU_PASS_QUEUED) {
base_passes_ready = false;
}
}
engine_passes[false].remove_if(
[&](auto item) { return item.value->should_gc(gc_collect_rate_, timestamp); });
}
/* Optimization Passes GC. */
for (auto &engine_passes : passes_) {
for (std::unique_ptr<GPUPass> &pass : engine_passes[true].values()) {
pass->update_gc_timestamp(timestamp);
}
engine_passes[true].remove_if(
/* TODO: Use lower rate for optimization passes? */
[&](auto item) { return item.value->should_gc(gc_collect_rate_, timestamp); });
}
if (!base_passes_ready) {
last_base_compilation_timestamp_ = timestamp;
return;
}
if ((timestamp - last_base_compilation_timestamp_) < optimization_delay_) {
return;
}
/* Optimization Passes Compilation. */
for (auto &engine_passes : passes_) {
for (std::unique_ptr<GPUPass> &pass : engine_passes[true].values()) {
pass->update_compilation();
}
}
}
std::mutex &get_mutex()
{
return mutex_;
}
};
static GPUPassCache *g_cache = nullptr;
void GPU_pass_ensure_its_ready(GPUPass *pass)
{
if (pass->status == GPU_PASS_QUEUED) {
std::lock_guard lock(g_cache->get_mutex());
if (pass->status == GPU_PASS_QUEUED) {
pass->finalize_compilation();
}
}
}
void GPU_pass_cache_init()
{
g_cache = MEM_new<GPUPassCache>(__func__);
}
void GPU_pass_cache_update()
{
g_cache->update();
}
void GPU_pass_cache_wait_for_all()
{
GPU_shader_batch_wait_for_all();
g_cache->update();
}
void GPU_pass_cache_free()
{
MEM_SAFE_DELETE(g_cache);
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Compilation
* \{ */
static bool gpu_pass_validate(GPUCodegenCreateInfo *create_info)
{
int samplers_len = 0;
for (const ShaderCreateInfo::Resource &res : create_info->resources_get_all_()) {
if (res.bind_type == ShaderCreateInfo::Resource::BindType::SAMPLER) {
samplers_len++;
}
}
/* Validate against GPU limit. */
if ((samplers_len > GPU_max_textures_frag()) || (samplers_len > GPU_max_textures_vert())) {
return false;
}
return (samplers_len * 2 <= GPU_max_textures());
}
GPUPass *GPU_generate_pass(GPUMaterial *material,
GPUNodeGraph *graph,
const char *debug_name,
eGPUMaterialEngine engine,
bool deferred_compilation,
GPUCodegenCallbackFn finalize_source_cb,
void *thunk,
bool optimize_graph)
{
gpu_node_graph_prune_unused(graph);
/* If Optimize flag is passed in, we are generating an optimized
* variant of the GPUMaterial's GPUPass. */
if (optimize_graph) {
gpu_node_graph_optimize(graph);
}
/* Extract attributes before compiling so the generated VBOs are ready to accept the future
* shader. */
gpu_node_graph_finalize_uniform_attrs(graph);
GPUCodegen codegen(material, graph, debug_name);
codegen.generate_graphs();
codegen.generate_cryptomatte();
GPUPass *pass = nullptr;
if (!optimize_graph) {
/* The optimized version of the shader should not re-generate a UBO.
* The UBO will not be used for this variant. */
codegen.generate_uniform_buffer();
}
/* Cache lookup: Reuse shaders already compiled. */
pass = g_cache->get(engine, codegen.hash_get(), deferred_compilation, optimize_graph);
if (pass) {
pass->refcount++;
return pass;
}
/* The shader is not compiled, continue generating the shader strings. */
codegen.generate_attribs();
codegen.generate_resources();
codegen.generate_library();
/* Make engine add its own code and implement the generated functions. */
finalize_source_cb(thunk, material, &codegen.output);
codegen.create_info->finalize();
g_cache->add(engine, codegen, deferred_compilation, optimize_graph);
codegen.create_info = nullptr;
return g_cache->get(engine, codegen.hash_get(), deferred_compilation, optimize_graph);
}
/** \} */

View File

@@ -387,6 +387,11 @@ void GPU_shader_batch_cancel(BatchHandle &handle)
GPUBackend::get()->get_compiler()->batch_cancel(handle);
}
void GPU_shader_batch_wait_for_all()
{
GPUBackend::get()->get_compiler()->wait_for_all();
}
void GPU_shader_compile_static()
{
printf("Compiling all static GPU shaders. This process takes a while.\n");
@@ -1029,6 +1034,7 @@ bool ShaderCompiler::batch_is_ready(BatchHandle handle)
Vector<Shader *> ShaderCompiler::batch_finalize(BatchHandle &handle)
{
std::unique_lock lock(mutex_);
/* TODO: Move to be first on the queue. */
compilation_finished_notification_.wait(lock,
[&]() { return batches_.lookup(handle)->is_ready(); });
@@ -1116,6 +1122,24 @@ void ShaderCompiler::run_thread()
}
}
void ShaderCompiler::wait_for_all()
{
std::unique_lock lock(mutex_);
compilation_finished_notification_.wait(lock, [&]() {
if (!compilation_queue_.empty()) {
return false;
}
for (Batch *batch : batches_.values()) {
if (!batch->is_ready()) {
return false;
}
}
return true;
});
}
/** \} */
} // namespace blender::gpu

View File

@@ -223,6 +223,8 @@ class ShaderCompiler {
SpecializationBatchHandle precompile_specializations(Span<ShaderSpecialization> specializations);
bool specialization_batch_is_ready(SpecializationBatchHandle &handle);
void wait_for_all();
};
enum class Severity {

View File

@@ -56,9 +56,7 @@ static int node_shader_gpu_tex_environment(GPUMaterial *mat,
GPUNodeLink *outalpha;
/* HACK(@fclem): For lookdev mode: do not compile an empty environment and just create an empty
* texture entry point. We manually bind to it after #DRW_shgroup_add_material_resources(). */
if (!ima && !GPU_material_flag_get(mat, GPU_MATFLAG_LOOKDEV_HACK)) {
if (!ima) {
return GPU_stack_link(mat, node, "node_tex_environment_empty", in, out);
}

View File

@@ -105,7 +105,7 @@
#include "GPU_context.hh"
#include "GPU_init_exit.hh"
#include "GPU_material.hh"
#include "GPU_shader.hh"
#include "COM_compositor.hh"
@@ -161,8 +161,6 @@ void WM_init_gpu()
GPU_init();
GPU_pass_cache_init();
if (G.debug & G_DEBUG_GPU_COMPILE_SHADERS) {
GPU_shader_compile_static();
}
@@ -645,7 +643,6 @@ void WM_exit_ex(bContext *C, const bool do_python_exit, const bool do_user_exit_
if (gpu_is_init) {
DRW_gpu_context_enable_ex(false);
UI_exit();
GPU_pass_cache_free();
GPU_shader_cache_dir_clear_old();
GPU_exit();
DRW_gpu_context_disable_ex(false);