diff --git a/source/blender/compositor/intern/shader_operation.cc b/source/blender/compositor/intern/shader_operation.cc index 1f3c7e8b128..0f946384b03 100644 --- a/source/blender/compositor/intern/shader_operation.cc +++ b/source/blender/compositor/intern/shader_operation.cc @@ -44,8 +44,6 @@ ShaderOperation::ShaderOperation(Context &context, { material_ = GPU_material_from_callbacks( GPU_MAT_COMPOSITOR, &construct_material, &generate_code, this); - GPU_material_status_set(material_, GPU_MAT_QUEUED); - GPU_material_compile(material_); } ShaderOperation::~ShaderOperation() diff --git a/source/blender/draw/CMakeLists.txt b/source/blender/draw/CMakeLists.txt index 923a9849268..a46dfb4fe33 100644 --- a/source/blender/draw/CMakeLists.txt +++ b/source/blender/draw/CMakeLists.txt @@ -75,7 +75,6 @@ set(SRC intern/draw_gpu_context.cc intern/draw_hair.cc intern/draw_manager.cc - intern/draw_manager_shader.cc intern/draw_manager_text.cc intern/draw_pbvh.cc intern/draw_pointcloud.cc diff --git a/source/blender/draw/DRW_engine.hh b/source/blender/draw/DRW_engine.hh index 3294322a01a..919a65cc330 100644 --- a/source/blender/draw/DRW_engine.hh +++ b/source/blender/draw/DRW_engine.hh @@ -180,9 +180,6 @@ void DRW_system_gpu_render_context_disable(void *re_system_gpu_context); void DRW_blender_gpu_render_context_enable(void *re_gpu_context); void DRW_blender_gpu_render_context_disable(void *re_gpu_context); -void DRW_deferred_shader_remove(GPUMaterial *mat); -void DRW_deferred_shader_optimize_remove(GPUMaterial *mat); - DRWData *DRW_viewport_data_create(); void DRW_viewport_data_free(DRWData *drw_data); diff --git a/source/blender/draw/engines/eevee/eevee_instance.cc b/source/blender/draw/engines/eevee/eevee_instance.cc index 8800e0e6cf2..5f969ffbc0d 100644 --- a/source/blender/draw/engines/eevee/eevee_instance.cc +++ b/source/blender/draw/engines/eevee/eevee_instance.cc @@ -25,6 +25,7 @@ #include "ED_screen.hh" #include "ED_view3d.hh" #include "GPU_context.hh" +#include "GPU_pass.hh" #include "IMB_imbuf_types.hh" #include "RE_pipeline.h" @@ -484,9 +485,12 @@ void Instance::render_sample() if (!is_viewport() && sampling.do_render_sync()) { render_sync(); while (materials.queued_shaders_count > 0) { - /* Leave some time for shaders to compile. */ - BLI_time_sleep_ms(50); - /** WORKAROUND: Re-sync to check if all shaders are already compiled. */ + GPU_pass_cache_wait_for_all(); + /** WORKAROUND: Re-sync now that all shaders are compiled. */ + /* This may need to happen more than once, since actual materials may require more passes + * (eg. volume ones) than the fallback material used for queued passes. */ + /* TODO(@pragma37): There seems to be an issue where multiple `step_object_sync` calls on the + * same step can cause mismatching `has_motion` values between sync. */ render_sync(); } } @@ -824,10 +828,13 @@ void Instance::light_bake_irradiance( custom_pipeline_wrapper([&]() { this->render_sync(); while (materials.queued_shaders_count > 0) { - /* Leave some time for shaders to compile. */ - BLI_time_sleep_ms(50); - /** WORKAROUND: Re-sync to check if all shaders are already compiled. */ - this->render_sync(); + GPU_pass_cache_wait_for_all(); + /** WORKAROUND: Re-sync now that all shaders are compiled. */ + /* This may need to happen more than once, since actual materials may require more passes + * (eg. volume ones) than the fallback material used for queued passes. */ + /* TODO(@pragma37): There seems to be an issue where multiple `step_object_sync` calls on the + * same step can cause mismatching `has_motion` values between sync. */ + render_sync(); } /* Sampling module needs to be initialized to computing lighting. */ sampling.init(probe); diff --git a/source/blender/draw/engines/eevee/eevee_lookdev.cc b/source/blender/draw/engines/eevee/eevee_lookdev.cc index 22c446fa1e1..b34abba433e 100644 --- a/source/blender/draw/engines/eevee/eevee_lookdev.cc +++ b/source/blender/draw/engines/eevee/eevee_lookdev.cc @@ -312,7 +312,7 @@ void LookdevModule::sync_pass(PassSimple &pass, const DRWState state = DRW_STATE_WRITE_COLOR | DRW_STATE_CULL_BACK; GPUMaterial *gpumat = inst_.shaders.material_shader_get( - mat, mat->nodetree, MAT_PIPE_FORWARD, MAT_GEOM_MESH, MAT_PROBE_NONE); + mat, mat->nodetree, MAT_PIPE_FORWARD, MAT_GEOM_MESH, false, inst_.materials.default_surface); pass.state_set(state); pass.material_set(*inst_.manager, gpumat); pass.bind_texture(RBUFS_UTILITY_TEX_SLOT, inst_.pipelines.utility_tx); diff --git a/source/blender/draw/engines/eevee/eevee_material.cc b/source/blender/draw/engines/eevee/eevee_material.cc index 60e295502e6..1159379661f 100644 --- a/source/blender/draw/engines/eevee/eevee_material.cc +++ b/source/blender/draw/engines/eevee/eevee_material.cc @@ -119,6 +119,12 @@ MaterialModule::MaterialModule(Instance &inst) : inst_(inst) bke::node_set_active(*ntree, *output); } + { + default_surface = reinterpret_cast<::Material *>(BKE_id_copy_ex( + nullptr, &BKE_material_default_surface()->id, nullptr, LIB_ID_COPY_LOCALIZE)); + default_volume = reinterpret_cast<::Material *>(BKE_id_copy_ex( + nullptr, &BKE_material_default_volume()->id, nullptr, LIB_ID_COPY_LOCALIZE)); + } { error_mat_ = BKE_id_new_nomain<::Material>("EEVEE default error"); bNodeTree *ntree = bke::node_tree_add_tree_embedded( @@ -146,6 +152,8 @@ MaterialModule::~MaterialModule() { BKE_id_free(nullptr, metallic_mat); BKE_id_free(nullptr, diffuse_mat); + BKE_id_free(nullptr, default_surface); + BKE_id_free(nullptr, default_volume); BKE_id_free(nullptr, error_mat_); } @@ -154,6 +162,10 @@ void MaterialModule::begin_sync() queued_shaders_count = 0; queued_optimize_shaders_count = 0; + uint64_t next_update = GPU_pass_global_compilation_count(); + gpu_pass_last_update_ = gpu_pass_next_update_; + gpu_pass_next_update_ = next_update; + material_map_.clear(); shader_map_.clear(); } @@ -174,11 +186,13 @@ MaterialPass MaterialModule::material_pass_get(Object *ob, use_deferred_compilation = false; } + const bool is_volume = ELEM(pipeline_type, MAT_PIPE_VOLUME_OCCUPANCY, MAT_PIPE_VOLUME_MATERIAL); + ::Material *default_mat = is_volume ? default_volume : default_surface; + MaterialPass matpass = MaterialPass(); matpass.gpumat = inst_.shaders.material_shader_get( - blender_mat, ntree, pipeline_type, geometry_type, use_deferred_compilation); + blender_mat, ntree, pipeline_type, geometry_type, use_deferred_compilation, default_mat); - const bool is_volume = ELEM(pipeline_type, MAT_PIPE_VOLUME_OCCUPANCY, MAT_PIPE_VOLUME_MATERIAL); const bool is_forward = ELEM(pipeline_type, MAT_PIPE_FORWARD, MAT_PIPE_PREPASS_FORWARD, @@ -196,12 +210,13 @@ MaterialPass MaterialModule::material_pass_get(Object *ob, } case GPU_MAT_QUEUED: queued_shaders_count++; - matpass.gpumat = inst_.shaders.material_default_shader_get(pipeline_type, geometry_type); + matpass.gpumat = inst_.shaders.material_shader_get( + default_mat, default_mat->nodetree, pipeline_type, geometry_type, false, nullptr); break; case GPU_MAT_FAILED: default: matpass.gpumat = inst_.shaders.material_shader_get( - error_mat_, error_mat_->nodetree, pipeline_type, geometry_type, false); + error_mat_, error_mat_->nodetree, pipeline_type, geometry_type, false, nullptr); break; } /* Returned material should be ready to be drawn. */ @@ -211,11 +226,9 @@ MaterialPass MaterialModule::material_pass_get(Object *ob, const bool is_transparent = GPU_material_flag_get(matpass.gpumat, GPU_MATFLAG_TRANSPARENT); - if (inst_.is_viewport() && use_deferred_compilation && - GPU_material_recalc_flag_get(matpass.gpumat)) - { - /* TODO(Miguel Pozo): This is broken, it consumes the flag, - * but GPUMats can be shared across viewports. */ + bool pass_updated = GPU_material_compilation_timestamp(matpass.gpumat) > gpu_pass_last_update_; + + if (inst_.is_viewport() && use_deferred_compilation && pass_updated) { inst_.sampling.reset(); const bool has_displacement = GPU_material_has_displacement_output(matpass.gpumat) && diff --git a/source/blender/draw/engines/eevee/eevee_material.hh b/source/blender/draw/engines/eevee/eevee_material.hh index f740a824091..4586ba373b7 100644 --- a/source/blender/draw/engines/eevee/eevee_material.hh +++ b/source/blender/draw/engines/eevee/eevee_material.hh @@ -352,6 +352,8 @@ class MaterialModule { public: ::Material *diffuse_mat; ::Material *metallic_mat; + ::Material *default_surface; + ::Material *default_volume; int64_t queued_shaders_count = 0; int64_t queued_optimize_shaders_count = 0; @@ -368,6 +370,9 @@ class MaterialModule { ::Material *error_mat_; + uint64_t gpu_pass_last_update_ = 0; + uint64_t gpu_pass_next_update_ = 0; + public: MaterialModule(Instance &inst); ~MaterialModule(); diff --git a/source/blender/draw/engines/eevee/eevee_shader.cc b/source/blender/draw/engines/eevee/eevee_shader.cc index ba94756b1df..f9fe31a1edd 100644 --- a/source/blender/draw/engines/eevee/eevee_shader.cc +++ b/source/blender/draw/engines/eevee/eevee_shader.cc @@ -12,6 +12,7 @@ #include "GPU_capabilities.hh" #include "BKE_material.hh" +#include "DNA_world_types.h" #include "gpu_shader_create_info.hh" @@ -916,17 +917,25 @@ void ShaderModule::material_create_info_amend(GPUMaterial *gpumat, GPUCodegenOut } } +struct CallbackThunk { + ShaderModule *shader_module; + ::Material *default_mat; +}; + /* WATCH: This can be called from another thread! Needs to not touch the shader module in any * thread unsafe manner. */ -static void codegen_callback(void *thunk, GPUMaterial *mat, GPUCodegenOutput *codegen) +static void codegen_callback(void *void_thunk, GPUMaterial *mat, GPUCodegenOutput *codegen) { - reinterpret_cast(thunk)->material_create_info_amend(mat, codegen); + CallbackThunk *thunk = static_cast(void_thunk); + thunk->shader_module->material_create_info_amend(mat, codegen); } -static GPUPass *pass_replacement_cb(void *thunk, GPUMaterial *mat) +static GPUPass *pass_replacement_cb(void *void_thunk, GPUMaterial *mat) { using namespace blender::gpu::shader; + CallbackThunk *thunk = static_cast(void_thunk); + const ::Material *blender_mat = GPU_material_get_material(mat); uint64_t shader_uuid = GPU_material_uuid_get(mat); @@ -963,100 +972,66 @@ static GPUPass *pass_replacement_cb(void *thunk, GPUMaterial *mat) (is_prepass && (!has_vertex_displacement && !has_transparency && !has_raytraced_transmission)); if (can_use_default) { - GPUMaterial *mat = reinterpret_cast(thunk)->material_default_shader_get( - pipeline_type, geometry_type); + GPUMaterial *mat = thunk->shader_module->material_shader_get(thunk->default_mat, + thunk->default_mat->nodetree, + pipeline_type, + geometry_type, + false, + nullptr); return GPU_material_get_pass(mat); } return nullptr; } -GPUMaterial *ShaderModule::material_default_shader_get(eMaterialPipeline pipeline_type, - eMaterialGeometry geometry_type) -{ - bool is_volume = ELEM(pipeline_type, MAT_PIPE_VOLUME_MATERIAL, MAT_PIPE_VOLUME_OCCUPANCY); - ::Material *blender_mat = (is_volume) ? BKE_material_default_volume() : - BKE_material_default_surface(); - - return material_shader_get( - blender_mat, blender_mat->nodetree, pipeline_type, geometry_type, false); -} - GPUMaterial *ShaderModule::material_shader_get(::Material *blender_mat, bNodeTree *nodetree, eMaterialPipeline pipeline_type, eMaterialGeometry geometry_type, - bool deferred_compilation) + bool deferred_compilation, + ::Material *default_mat) { - bool is_volume = ELEM(pipeline_type, MAT_PIPE_VOLUME_MATERIAL, MAT_PIPE_VOLUME_OCCUPANCY); - eMaterialDisplacement displacement_type = to_displacement_type(blender_mat->displacement_method); eMaterialThickness thickness_type = to_thickness_type(blender_mat->thickness_mode); uint64_t shader_uuid = shader_uuid_from_material_type( pipeline_type, geometry_type, displacement_type, thickness_type, blender_mat->blend_flag); - bool is_default_material = ELEM( - blender_mat, BKE_material_default_surface(), BKE_material_default_volume()); + bool is_default_material = default_mat == nullptr; + BLI_assert(blender_mat != default_mat); - GPUMaterial *mat = DRW_shader_from_material(blender_mat, - nodetree, - GPU_MAT_EEVEE, - shader_uuid, - is_volume, - deferred_compilation, - codegen_callback, - this, - is_default_material ? nullptr : pass_replacement_cb); + CallbackThunk thunk = {this, default_mat}; - return mat; + return GPU_material_from_nodetree(blender_mat, + nodetree, + &blender_mat->gpumaterial, + blender_mat->id.name, + GPU_MAT_EEVEE, + shader_uuid, + deferred_compilation, + codegen_callback, + &thunk, + is_default_material ? nullptr : pass_replacement_cb); } GPUMaterial *ShaderModule::world_shader_get(::World *blender_world, bNodeTree *nodetree, - eMaterialPipeline pipeline_type) + eMaterialPipeline pipeline_type, + bool deferred_compilation) { - bool is_volume = (pipeline_type == MAT_PIPE_VOLUME_MATERIAL); - bool defer_compilation = is_volume; - uint64_t shader_uuid = shader_uuid_from_material_type(pipeline_type, MAT_GEOM_WORLD); - return DRW_shader_from_world(blender_world, - nodetree, - GPU_MAT_EEVEE, - shader_uuid, - is_volume, - defer_compilation, - codegen_callback, - this); -} + CallbackThunk thunk = {this, nullptr}; -GPUMaterial *ShaderModule::material_shader_get(const char *name, - ListBase &materials, - bNodeTree *nodetree, - eMaterialPipeline pipeline_type, - eMaterialGeometry geometry_type) -{ - uint64_t shader_uuid = shader_uuid_from_material_type(pipeline_type, geometry_type); - - bool is_volume = ELEM(pipeline_type, MAT_PIPE_VOLUME_MATERIAL, MAT_PIPE_VOLUME_OCCUPANCY); - - GPUMaterial *gpumat = GPU_material_from_nodetree(nullptr, - nullptr, - nodetree, - &materials, - name, - GPU_MAT_EEVEE, - shader_uuid, - is_volume, - false, - codegen_callback, - this); - GPU_material_status_set(gpumat, GPU_MAT_CREATED); - GPU_material_compile(gpumat); - /* Queue deferred material optimization. */ - DRW_shader_queue_optimize_material(gpumat); - return gpumat; + return GPU_material_from_nodetree(nullptr, + nodetree, + &blender_world->gpumaterial, + blender_world->id.name, + GPU_MAT_EEVEE, + shader_uuid, + deferred_compilation, + codegen_callback, + &thunk); } /** \} */ diff --git a/source/blender/draw/engines/eevee/eevee_shader.hh b/source/blender/draw/engines/eevee/eevee_shader.hh index a0ba809f4a3..31aeb5bb4b3 100644 --- a/source/blender/draw/engines/eevee/eevee_shader.hh +++ b/source/blender/draw/engines/eevee/eevee_shader.hh @@ -235,26 +235,16 @@ class ShaderModule { bool use_lightprobe_eval); GPUShader *static_shader_get(eShaderType shader_type); - GPUMaterial *material_default_shader_get(eMaterialPipeline pipeline_type, - eMaterialGeometry geometry_type); GPUMaterial *material_shader_get(::Material *blender_mat, bNodeTree *nodetree, eMaterialPipeline pipeline_type, eMaterialGeometry geometry_type, - bool deferred_compilation); + bool deferred_compilation, + ::Material *default_mat); GPUMaterial *world_shader_get(::World *blender_world, bNodeTree *nodetree, - eMaterialPipeline pipeline_type); - - /** - * Variation to compile a material only with a `nodetree`. Caller needs to maintain the list of - * materials and call GPU_material_free on it to update the material. - */ - GPUMaterial *material_shader_get(const char *name, - ListBase &materials, - bNodeTree *nodetree, - eMaterialPipeline pipeline_type, - eMaterialGeometry geometry_type); + eMaterialPipeline pipeline_type, + bool deferred_compilation); void material_create_info_amend(GPUMaterial *mat, GPUCodegenOutput *codegen); diff --git a/source/blender/draw/engines/eevee/eevee_world.cc b/source/blender/draw/engines/eevee/eevee_world.cc index 1cded52c433..991a16f6224 100644 --- a/source/blender/draw/engines/eevee/eevee_world.cc +++ b/source/blender/draw/engines/eevee/eevee_world.cc @@ -148,7 +148,7 @@ void World::sync() inst_.sampling.reset(); } - GPUMaterial *gpumat = inst_.shaders.world_shader_get(bl_world, ntree, MAT_PIPE_DEFERRED); + GPUMaterial *gpumat = inst_.shaders.world_shader_get(bl_world, ntree, MAT_PIPE_DEFERRED, false); inst_.manager->register_layer_attributes(gpumat); @@ -169,7 +169,8 @@ void World::sync_volume(const WorldHandle &world_handle) /* Only the scene world nodetree can have volume shader. */ if (world && world->nodetree && world->use_nodes) { - gpumat = inst_.shaders.world_shader_get(world, world->nodetree, MAT_PIPE_VOLUME_MATERIAL); + gpumat = inst_.shaders.world_shader_get( + world, world->nodetree, MAT_PIPE_VOLUME_MATERIAL, !inst_.is_image_render); } bool had_volume = has_volume_; diff --git a/source/blender/draw/intern/DRW_render.hh b/source/blender/draw/intern/DRW_render.hh index 65dc1388771..0e2d0a1683c 100644 --- a/source/blender/draw/intern/DRW_render.hh +++ b/source/blender/draw/intern/DRW_render.hh @@ -123,31 +123,6 @@ struct DrawEngine { }; }; -/* Shaders */ -/** IMPORTANT: Modify the currently bound context. */ -void DRW_shader_init(); -void DRW_shader_exit(); - -GPUMaterial *DRW_shader_from_world(World *wo, - bNodeTree *ntree, - eGPUMaterialEngine engine, - const uint64_t shader_id, - const bool is_volume_shader, - bool deferred, - GPUCodegenCallbackFn callback, - void *thunk); -GPUMaterial *DRW_shader_from_material( - Material *ma, - bNodeTree *ntree, - eGPUMaterialEngine engine, - const uint64_t shader_id, - const bool is_volume_shader, - bool deferred, - GPUCodegenCallbackFn callback, - void *thunk, - GPUMaterialPassReplacementCallbackFn pass_replacement_cb = nullptr); -void DRW_shader_queue_optimize_material(GPUMaterial *mat); - /* Viewport. */ /** diff --git a/source/blender/draw/intern/draw_gpu_context.cc b/source/blender/draw/intern/draw_gpu_context.cc index 30426c630dc..6b55bfa644a 100644 --- a/source/blender/draw/intern/draw_gpu_context.cc +++ b/source/blender/draw/intern/draw_gpu_context.cc @@ -150,17 +150,9 @@ void DRW_gpu_context_create() viewport_context = MEM_new(__func__); preview_context = MEM_new(__func__); - { - /** IMPORTANT: Very delicate context handling. Changing the order of context creation makes it - * crash in background mode on windows (see #136270). */ - - /* Setup compilation context. Called first as it changes the active GPUContext. */ - DRW_shader_init(); - - /* Some part of the code assumes no context is left bound. */ - GPU_context_active_set(nullptr); - WM_system_gpu_context_release(preview_context->system_gpu_context_); - } + /* Some part of the code assumes no context is left bound. */ + GPU_context_active_set(nullptr); + WM_system_gpu_context_release(preview_context->system_gpu_context_); /* Activate the window's context if any. */ wm_window_reset_drawable(); @@ -172,7 +164,6 @@ void DRW_gpu_context_destroy() if (viewport_context == nullptr) { return; } - DRW_shader_exit(); DRW_submission_mutex_exit(); MEM_SAFE_DELETE(viewport_context); diff --git a/source/blender/draw/intern/draw_manager_shader.cc b/source/blender/draw/intern/draw_manager_shader.cc deleted file mode 100644 index 2a319251541..00000000000 --- a/source/blender/draw/intern/draw_manager_shader.cc +++ /dev/null @@ -1,435 +0,0 @@ -/* SPDX-FileCopyrightText: 2016 Blender Authors - * - * SPDX-License-Identifier: GPL-2.0-or-later */ - -/** \file - * \ingroup draw - */ - -#include "DNA_material_types.h" -#include "DNA_world_types.h" - -#include "BLI_threads.h" -#include "BLI_time.h" - -#include "DEG_depsgraph_query.hh" - -#include "GPU_capabilities.hh" -#include "GPU_material.hh" -#include "GPU_state.hh" - -#include "WM_api.hh" - -#include "draw_context_private.hh" - -#include -#include -#include - -extern "C" char datatoc_gpu_shader_depth_only_frag_glsl[]; -extern "C" char datatoc_common_fullscreen_vert_glsl[]; - -using namespace blender; - -/* -------------------------------------------------------------------- */ -/** \name Deferred Compilation (DRW_deferred) - * - * Since compiling shader can take a long time, we do it in a non blocking - * manner in another thread. - * - * \{ */ - -struct DRWShaderCompiler { - /** Default compilation queue. */ - Vector queue; - /** Optimization queue. */ - Vector optimize_queue; - - std::mutex queue_mutex; - std::condition_variable queue_cv; - - void *system_gpu_context; - GPUContext *blender_gpu_context; - - std::atomic stop; -}; - -/** NOTE: While the `BLI_threads` API requires a List, - * we only create a single thread at application startup and delete it at exit. */ -static ListBase &compilation_threadpool() -{ - static ListBase compilation_threadpool_ = {}; - return compilation_threadpool_; -} - -static DRWShaderCompiler &compiler_data() -{ - static DRWShaderCompiler compiler_data_ = {}; - return compiler_data_; -} - -static void *drw_deferred_shader_compilation_exec(void * /*unused*/) -{ - using namespace blender; - - void *system_gpu_context = compiler_data().system_gpu_context; - GPUContext *blender_gpu_context = compiler_data().blender_gpu_context; - BLI_assert(system_gpu_context != nullptr); - BLI_assert(blender_gpu_context != nullptr); - GPU_render_begin(); - WM_system_gpu_context_activate(system_gpu_context); - GPU_context_active_set(blender_gpu_context); - - const bool use_parallel_compilation = GPU_use_parallel_compilation(); - Vector async_mats; - - while (true) { - if (compiler_data().stop) { - break; - } - - compiler_data().queue_mutex.lock(); - /* Pop last because it will be less likely to lock the main thread - * if all GPUMaterials are to be freed (see DRW_deferred_shader_remove()). */ - GPUMaterial *mat = compiler_data().queue.is_empty() ? nullptr : - compiler_data().queue.pop_last(); - if (mat) { - /* Avoid another thread freeing the material mid compilation. */ - GPU_material_acquire(mat); - } - compiler_data().queue_mutex.unlock(); - - if (mat) { - /* We have a new material that must be compiled, - * we either compile it directly or add it to the async compilation list. */ - if (use_parallel_compilation) { - GPU_material_async_compile(mat); - async_mats.append(mat); - } - else { - GPU_material_compile(mat); - GPU_material_release(mat); - } - } - else if (!async_mats.is_empty()) { - /* (only if use_parallel_compilation == true) - * Keep querying the requested materials until all of them are ready. */ - async_mats.remove_if([](GPUMaterial *mat) { - if (GPU_material_async_try_finalize(mat)) { - GPU_material_release(mat); - return true; - } - return false; - }); - } - else { - /* Check for Material Optimization job once there are no more - * shaders to compile. */ - compiler_data().queue_mutex.lock(); - /* Pop last because it will be less likely to lock the main thread - * if all GPUMaterials are to be freed (see DRW_deferred_shader_remove()). */ - GPUMaterial *optimize_mat = compiler_data().optimize_queue.is_empty() ? - nullptr : - compiler_data().optimize_queue.pop_last(); - if (optimize_mat) { - /* Avoid another thread freeing the material during optimization. */ - GPU_material_acquire(optimize_mat); - } - compiler_data().queue_mutex.unlock(); - - if (optimize_mat) { - /* Compile optimized material shader. */ - GPU_material_optimize(optimize_mat); - GPU_material_release(optimize_mat); - } - else { - /* No more materials to optimize, or shaders to compile. */ - std::unique_lock lock(compiler_data().queue_mutex); - compiler_data().queue_cv.wait(lock); - } - } - - if (GPU_type_matches_ex(GPU_DEVICE_ANY, GPU_OS_ANY, GPU_DRIVER_ANY, GPU_BACKEND_OPENGL)) { - GPU_flush(); - } - } - - /* We have to wait until all the requested batches are ready, - * even if compiler_data().stop is true. */ - while (!async_mats.is_empty()) { - async_mats.remove_if([](GPUMaterial *mat) { - if (GPU_material_async_try_finalize(mat)) { - GPU_material_release(mat); - return true; - } - return false; - }); - } - - GPU_context_active_set(nullptr); - WM_system_gpu_context_release(system_gpu_context); - GPU_render_end(); - - return nullptr; -} - -void DRW_shader_init() -{ - if (GPU_use_main_context_workaround()) { - /* Deferred compilation is not supported. */ - return; - } - static bool initialized = false; - if (initialized) { - BLI_assert_unreachable(); - return; - } - initialized = true; - - compiler_data().stop = false; - - compiler_data().system_gpu_context = WM_system_gpu_context_create(); - compiler_data().blender_gpu_context = GPU_context_create(nullptr, - compiler_data().system_gpu_context); - - /* Some part of the code assumes no context is left bound. */ - GPU_context_active_set(nullptr); - WM_system_gpu_context_release(compiler_data().system_gpu_context); - - BLI_threadpool_init(&compilation_threadpool(), drw_deferred_shader_compilation_exec, 1); - BLI_threadpool_insert(&compilation_threadpool(), nullptr); -} - -void DRW_shader_exit() -{ - if (GPU_use_main_context_workaround()) { - /* Deferred compilation is not supported. */ - return; - } - - compiler_data().stop = true; - compiler_data().queue_cv.notify_one(); - BLI_threadpool_end(&compilation_threadpool()); - - /* Revert the queued state for the materials that has not been compiled. - * Note that this is not strictly needed since this function is called at program exit. */ - { - std::scoped_lock queue_lock(compiler_data().queue_mutex); - - while (!compiler_data().queue.is_empty()) { - GPU_material_status_set(compiler_data().queue.pop_last(), GPU_MAT_CREATED); - } - while (!compiler_data().optimize_queue.is_empty()) { - GPU_material_optimization_status_set(compiler_data().optimize_queue.pop_last(), - GPU_MAT_OPTIMIZATION_READY); - } - } - - WM_system_gpu_context_activate(compiler_data().system_gpu_context); - GPU_context_active_set(compiler_data().blender_gpu_context); - GPU_context_discard(compiler_data().blender_gpu_context); - WM_system_gpu_context_dispose(compiler_data().system_gpu_context); -} - -/** - * Append either shader compilation or optimization job to deferred queue. - * We keep two separate queue's to ensure core compilations always complete before optimization. - */ -static void drw_deferred_queue_append(GPUMaterial *mat, bool is_optimization_job) -{ - std::scoped_lock queue_lock(compiler_data().queue_mutex); - - /* Add to either compilation or optimization queue. */ - if (is_optimization_job) { - BLI_assert(GPU_material_optimization_status(mat) != GPU_MAT_OPTIMIZATION_QUEUED); - GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_QUEUED); - compiler_data().optimize_queue.append(mat); - } - else { - GPU_material_status_set(mat, GPU_MAT_QUEUED); - compiler_data().queue.append(mat); - } - - compiler_data().queue_cv.notify_one(); -} - -static void drw_deferred_shader_add(GPUMaterial *mat, bool deferred) -{ - if (ELEM(GPU_material_status(mat), GPU_MAT_SUCCESS, GPU_MAT_FAILED)) { - return; - } - - if (GPU_use_main_context_workaround()) { - deferred = false; - } - - if (!deferred) { - DRW_deferred_shader_remove(mat); - /* Shaders could already be compiling. Have to wait for compilation to finish. */ - while (GPU_material_status(mat) == GPU_MAT_QUEUED) { - BLI_time_sleep_ms(20); - } - if (GPU_material_status(mat) == GPU_MAT_CREATED) { - GPU_material_compile(mat); - } - return; - } - - /* Don't add material to the queue twice. */ - if (GPU_material_status(mat) == GPU_MAT_QUEUED) { - return; - } - - /* Add deferred shader compilation to queue. */ - drw_deferred_queue_append(mat, false); -} - -void DRW_deferred_shader_remove(GPUMaterial *mat) -{ - if (GPU_use_main_context_workaround()) { - /* Deferred compilation is not supported. */ - return; - } - - std::scoped_lock queue_lock(compiler_data().queue_mutex); - - /* Search for compilation job in queue. */ - if (compiler_data().queue.contains(mat)) { - compiler_data().queue.remove_first_occurrence_and_reorder(mat); - GPU_material_status_set(mat, GPU_MAT_CREATED); - } - - /* Search for optimization job in queue. */ - if (compiler_data().optimize_queue.contains(mat)) { - compiler_data().optimize_queue.remove_first_occurrence_and_reorder(mat); - GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_READY); - } -} - -void DRW_deferred_shader_optimize_remove(GPUMaterial *mat) -{ - if (GPU_use_main_context_workaround()) { - /* Deferred compilation is not supported. */ - return; - } - - std::scoped_lock queue_lock(compiler_data().queue_mutex); - - /* Search for optimization job in queue. */ - if (compiler_data().optimize_queue.contains(mat)) { - compiler_data().optimize_queue.remove_first_occurrence_and_reorder(mat); - GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_READY); - } -} - -/** \} */ - -/* -------------------------------------------------------------------- */ - -/** \{ */ - -GPUMaterial *DRW_shader_from_world(World *wo, - bNodeTree *ntree, - eGPUMaterialEngine engine, - const uint64_t shader_id, - const bool is_volume_shader, - bool deferred, - GPUCodegenCallbackFn callback, - void *thunk) -{ - Scene *scene = DEG_get_original(drw_get().scene); - GPUMaterial *mat = GPU_material_from_nodetree(scene, - nullptr, - ntree, - &wo->gpumaterial, - wo->id.name, - engine, - shader_id, - is_volume_shader, - false, - callback, - thunk); - - if (DRW_context_get()->is_image_render()) { - /* Do not deferred if doing render. */ - deferred = false; - } - - drw_deferred_shader_add(mat, deferred); - DRW_shader_queue_optimize_material(mat); - return mat; -} - -GPUMaterial *DRW_shader_from_material(Material *ma, - bNodeTree *ntree, - eGPUMaterialEngine engine, - const uint64_t shader_id, - const bool is_volume_shader, - bool deferred, - GPUCodegenCallbackFn callback, - void *thunk, - GPUMaterialPassReplacementCallbackFn pass_replacement_cb) -{ - Scene *scene = DEG_get_original(drw_get().scene); - GPUMaterial *mat = GPU_material_from_nodetree(scene, - ma, - ntree, - &ma->gpumaterial, - ma->id.name, - engine, - shader_id, - is_volume_shader, - false, - callback, - thunk, - pass_replacement_cb); - - drw_deferred_shader_add(mat, deferred); - DRW_shader_queue_optimize_material(mat); - return mat; -} - -void DRW_shader_queue_optimize_material(GPUMaterial *mat) -{ - /* Do not perform deferred optimization if performing render. - * De-queue any queued optimization jobs. */ - if (DRW_context_get()->is_image_render()) { - if (GPU_material_optimization_status(mat) == GPU_MAT_OPTIMIZATION_QUEUED) { - /* Remove from pending optimization job queue. */ - DRW_deferred_shader_optimize_remove(mat); - /* If optimization job had already started, wait for it to complete. */ - while (GPU_material_optimization_status(mat) == GPU_MAT_OPTIMIZATION_QUEUED) { - BLI_time_sleep_ms(20); - } - } - return; - } - - /* We do not need to perform optimization on the material if it is already compiled or in the - * optimization queue. If optimization is not required, the status will be flagged as - * `GPU_MAT_OPTIMIZATION_SKIP`. - * We can also skip cases which have already been queued up. */ - if (ELEM(GPU_material_optimization_status(mat), - GPU_MAT_OPTIMIZATION_SKIP, - GPU_MAT_OPTIMIZATION_SUCCESS, - GPU_MAT_OPTIMIZATION_QUEUED)) - { - return; - } - - /* Only queue optimization once the original shader has been successfully compiled. */ - if (GPU_material_status(mat) != GPU_MAT_SUCCESS) { - return; - } - - /* Defer optimization until sufficient time has passed beyond creation. This avoids excessive - * recompilation for shaders which are being actively modified. */ - if (!GPU_material_optimization_ready(mat)) { - return; - } - - /* Add deferred shader compilation to queue. */ - drw_deferred_queue_append(mat, true); -} - -/** \} */ diff --git a/source/blender/draw/intern/draw_pass.hh b/source/blender/draw/intern/draw_pass.hh index 78a155514cf..2bef688be18 100644 --- a/source/blender/draw/intern/draw_pass.hh +++ b/source/blender/draw/intern/draw_pass.hh @@ -50,6 +50,7 @@ #include "GPU_debug.hh" #include "GPU_index_buffer.hh" #include "GPU_material.hh" +#include "GPU_pass.hh" #include "DRW_gpu_wrapper.hh" @@ -59,8 +60,6 @@ #include "draw_shader_shared.hh" #include "draw_state.hh" -#include "intern/gpu_codegen.hh" - #include #include diff --git a/source/blender/editors/space_view3d/view3d_draw.cc b/source/blender/editors/space_view3d/view3d_draw.cc index 0b768c073e1..f0803918a14 100644 --- a/source/blender/editors/space_view3d/view3d_draw.cc +++ b/source/blender/editors/space_view3d/view3d_draw.cc @@ -64,7 +64,6 @@ #include "GPU_framebuffer.hh" #include "GPU_immediate.hh" #include "GPU_immediate_util.hh" -#include "GPU_material.hh" #include "GPU_matrix.hh" #include "GPU_state.hh" #include "GPU_viewport.hh" @@ -1683,7 +1682,6 @@ void view3d_main_region_draw(const bContext *C, ARegion *region) DRW_cache_free_old_subdiv(); DRW_cache_free_old_batches(bmain); BKE_image_free_old_gputextures(bmain); - GPU_pass_cache_garbage_collect(); /* No depth test for drawing action zones afterwards. */ GPU_depth_test(GPU_DEPTH_NONE); diff --git a/source/blender/gpu/CMakeLists.txt b/source/blender/gpu/CMakeLists.txt index b8e63251f87..b43d6c0bdb0 100644 --- a/source/blender/gpu/CMakeLists.txt +++ b/source/blender/gpu/CMakeLists.txt @@ -77,6 +77,7 @@ set(SRC intern/gpu_material.cc intern/gpu_matrix.cc intern/gpu_node_graph.cc + intern/gpu_pass.cc intern/gpu_platform.cc intern/gpu_query.cc intern/gpu_select.cc @@ -118,6 +119,7 @@ set(SRC GPU_init_exit.hh GPU_material.hh GPU_matrix.hh + GPU_pass.hh GPU_platform.hh GPU_platform_backend_enum.h GPU_primitive.hh diff --git a/source/blender/gpu/GPU_material.hh b/source/blender/gpu/GPU_material.hh index ef15b098c4d..84f0a65b6fb 100644 --- a/source/blender/gpu/GPU_material.hh +++ b/source/blender/gpu/GPU_material.hh @@ -33,31 +33,28 @@ struct Scene; struct bNode; struct bNodeTree; -/* Functions to create GPU Materials nodes. */ +/** + * High level functions to create and use GPU materials. + */ -enum eGPUType { - /* Keep in sync with GPU_DATATYPE_STR */ - /* The value indicates the number of elements in each type */ - GPU_NONE = 0, - GPU_FLOAT = 1, - GPU_VEC2 = 2, - GPU_VEC3 = 3, - GPU_VEC4 = 4, - GPU_MAT3 = 9, - GPU_MAT4 = 16, - GPU_MAX_CONSTANT_DATA = GPU_MAT4, +enum eGPUMaterialEngine { + GPU_MAT_EEVEE, + GPU_MAT_COMPOSITOR, + GPU_MAT_ENGINE_MAX, +}; - /* Values not in GPU_DATATYPE_STR */ - GPU_TEX1D_ARRAY = 1001, - GPU_TEX2D = 1002, - GPU_TEX2D_ARRAY = 1003, - GPU_TEX3D = 1004, +enum eGPUMaterialStatus { + GPU_MAT_FAILED = 0, + GPU_MAT_QUEUED, + GPU_MAT_SUCCESS, +}; - /* GLSL Struct types */ - GPU_CLOSURE = 1007, - - /* Opengl Attributes */ - GPU_ATTR = 3001, +/* GPU_MAT_OPTIMIZATION_SKIP for cases where we do not + * plan to perform optimization on a given material. */ +enum eGPUMaterialOptimizationStatus { + GPU_MAT_OPTIMIZATION_SKIP = 0, + GPU_MAT_OPTIMIZATION_QUEUED, + GPU_MAT_OPTIMIZATION_SUCCESS, }; enum eGPUMaterialFlag { @@ -84,12 +81,193 @@ enum eGPUMaterialFlag { /* Tells the render engine the material was just compiled or updated. */ GPU_MATFLAG_UPDATED = (1 << 29), +}; +ENUM_OPERATORS(eGPUMaterialFlag, GPU_MATFLAG_UPDATED); - /* HACK(fclem) Tells the environment texture node to not bail out if empty. */ - GPU_MATFLAG_LOOKDEV_HACK = (1 << 30), +using GPUCodegenCallbackFn = void (*)(void *thunk, + GPUMaterial *mat, + struct GPUCodegenOutput *codegen); +/** + * Should return an already compiled pass if it's functionally equivalent to the one being + * compiled. + */ +using GPUMaterialPassReplacementCallbackFn = GPUPass *(*)(void *thunk, GPUMaterial *mat); + +/** WARNING: gpumaterials thread safety must be ensured by the caller. */ +GPUMaterial *GPU_material_from_nodetree( + Material *ma, + bNodeTree *ntree, + ListBase *gpumaterials, + const char *name, + eGPUMaterialEngine engine, + uint64_t shader_uuid, + bool deferred_compilation, + GPUCodegenCallbackFn callback, + void *thunk, + GPUMaterialPassReplacementCallbackFn pass_replacement_cb = nullptr); + +/* A callback passed to GPU_material_from_callbacks to construct the material graph by adding and + * linking the necessary GPU material nodes. */ +using ConstructGPUMaterialFn = void (*)(void *thunk, GPUMaterial *material); + +/* Construct a GPU material from a set of callbacks. See the callback types for more information. + * The given thunk will be passed as the first parameter of each callback. */ +GPUMaterial *GPU_material_from_callbacks(eGPUMaterialEngine engine, + ConstructGPUMaterialFn construct_function_cb, + GPUCodegenCallbackFn generate_code_function_cb, + void *thunk); + +void GPU_material_free_single(GPUMaterial *material); +void GPU_material_free(ListBase *gpumaterial); + +void GPU_materials_free(Main *bmain); + +GPUPass *GPU_material_get_pass(GPUMaterial *material); +/** Return the most optimal shader configuration for the given material. */ +GPUShader *GPU_material_get_shader(GPUMaterial *material); + +const char *GPU_material_get_name(GPUMaterial *material); + +/** + * Return can be null if it's a world material. + */ +Material *GPU_material_get_material(GPUMaterial *material); +/** + * Return true if the material compilation has not yet begin or begin. + */ +eGPUMaterialStatus GPU_material_status(GPUMaterial *mat); + +/** + * Return status for asynchronous optimization jobs. + */ +eGPUMaterialOptimizationStatus GPU_material_optimization_status(GPUMaterial *mat); + +uint64_t GPU_material_compilation_timestamp(GPUMaterial *mat); + +GPUUniformBuf *GPU_material_uniform_buffer_get(GPUMaterial *material); +/** + * Create dynamic UBO from parameters + * + * \param inputs: Items are #LinkData, data is #GPUInput (`BLI_genericNodeN(GPUInput)`). + */ +void GPU_material_uniform_buffer_create(GPUMaterial *material, ListBase *inputs); + +bool GPU_material_has_surface_output(GPUMaterial *mat); +bool GPU_material_has_volume_output(GPUMaterial *mat); +bool GPU_material_has_displacement_output(GPUMaterial *mat); + +bool GPU_material_flag_get(const GPUMaterial *mat, eGPUMaterialFlag flag); + +uint64_t GPU_material_uuid_get(GPUMaterial *mat); + +struct GPULayerAttr { + GPULayerAttr *next, *prev; + + /* Meaningful part of the attribute set key. */ + char name[256]; /* Multiple MAX_CUSTOMDATA_LAYER_NAME */ + /** Hash of name[68]. */ + uint32_t hash_code; + + /* Helper fields used by code generation. */ + int users; }; -ENUM_OPERATORS(eGPUMaterialFlag, GPU_MATFLAG_LOOKDEV_HACK); +const ListBase *GPU_material_layer_attributes(const GPUMaterial *material); + +/* Requested Material Attributes and Textures */ + +enum eGPUType { + /* Keep in sync with GPU_DATATYPE_STR */ + /* The value indicates the number of elements in each type */ + GPU_NONE = 0, + GPU_FLOAT = 1, + GPU_VEC2 = 2, + GPU_VEC3 = 3, + GPU_VEC4 = 4, + GPU_MAT3 = 9, + GPU_MAT4 = 16, + GPU_MAX_CONSTANT_DATA = GPU_MAT4, + + /* Values not in GPU_DATATYPE_STR */ + GPU_TEX1D_ARRAY = 1001, + GPU_TEX2D = 1002, + GPU_TEX2D_ARRAY = 1003, + GPU_TEX3D = 1004, + + /* GLSL Struct types */ + GPU_CLOSURE = 1007, + + /* Opengl Attributes */ + GPU_ATTR = 3001, +}; + +enum eGPUDefaultValue { + GPU_DEFAULT_0 = 0, + GPU_DEFAULT_1, +}; + +struct GPUMaterialAttribute { + GPUMaterialAttribute *next, *prev; + int type; /* eCustomDataType */ + char name[68]; /* MAX_CUSTOMDATA_LAYER_NAME */ + char input_name[12 + 1]; /* GPU_MAX_SAFE_ATTR_NAME + 1 */ + eGPUType gputype; + eGPUDefaultValue default_value; /* Only for volumes attributes. */ + int id; + int users; + /** + * If true, the corresponding attribute is the specified default color attribute on the mesh, + * if it exists. In that case the type and name data can vary per geometry, so it will not be + * valid here. + */ + bool is_default_color; + /** + * If true, the attribute is the length of hair particles and curves. + */ + bool is_hair_length; +}; + +struct GPUMaterialTexture { + GPUMaterialTexture *next, *prev; + Image *ima; + ImageUser iuser; + bool iuser_available; + GPUTexture **colorband; + GPUTexture **sky; + char sampler_name[32]; /* Name of sampler in GLSL. */ + char tiled_mapping_name[32]; /* Name of tile mapping sampler in GLSL. */ + int users; + GPUSamplerState sampler_state; +}; + +ListBase GPU_material_attributes(const GPUMaterial *material); +ListBase GPU_material_textures(GPUMaterial *material); + +struct GPUUniformAttr { + GPUUniformAttr *next, *prev; + + /* Meaningful part of the attribute set key. */ + char name[68]; /* MAX_CUSTOMDATA_LAYER_NAME */ + /** Hash of name[68] + use_dupli. */ + uint32_t hash_code; + bool use_dupli; + + /* Helper fields used by code generation. */ + short id; + int users; +}; + +struct GPUUniformAttrList { + ListBase list; /* GPUUniformAttr */ + + /* List length and hash code precomputed for fast lookup and comparison. */ + unsigned int count, hash_code; +}; + +const GPUUniformAttrList *GPU_material_uniform_attributes(const GPUMaterial *material); + +/* Functions to create GPU Materials nodes. */ +/* TODO: Move to its own header. */ struct GPUNodeStack { eGPUType type; @@ -101,27 +279,6 @@ struct GPUNodeStack { bool end; }; -enum eGPUMaterialStatus { - GPU_MAT_FAILED = 0, - GPU_MAT_CREATED, - GPU_MAT_QUEUED, - GPU_MAT_SUCCESS, -}; - -/* GPU_MAT_OPTIMIZATION_SKIP for cases where we do not - * plan to perform optimization on a given material. */ -enum eGPUMaterialOptimizationStatus { - GPU_MAT_OPTIMIZATION_SKIP = 0, - GPU_MAT_OPTIMIZATION_READY, - GPU_MAT_OPTIMIZATION_QUEUED, - GPU_MAT_OPTIMIZATION_SUCCESS, -}; - -enum eGPUDefaultValue { - GPU_DEFAULT_0 = 0, - GPU_DEFAULT_1, -}; - struct GPUCodegenOutput { std::string attr_load; /* Node-tree functions calls. */ @@ -135,13 +292,6 @@ struct GPUCodegenOutput { GPUShaderCreateInfo *create_info; }; -using GPUCodegenCallbackFn = void (*)(void *thunk, GPUMaterial *mat, GPUCodegenOutput *codegen); -/** - * Should return an already compiled pass if it's functionally equivalent to the one being - * compiled. - */ -using GPUMaterialPassReplacementCallbackFn = GPUPass *(*)(void *thunk, GPUMaterial *mat); - GPUNodeLink *GPU_constant(const float *num); GPUNodeLink *GPU_uniform(const float *num); GPUNodeLink *GPU_attribute(GPUMaterial *mat, eCustomDataType type, const char *name); @@ -217,203 +367,9 @@ char *GPU_material_split_sub_function(GPUMaterial *material, eGPUType return_type, GPUNodeLink **link); -/** - * High level functions to create and use GPU materials. - */ - -enum eGPUMaterialEngine { - GPU_MAT_EEVEE_LEGACY = 0, - GPU_MAT_EEVEE, - GPU_MAT_COMPOSITOR, -}; - -GPUMaterial *GPU_material_from_nodetree( - Scene *scene, - Material *ma, - bNodeTree *ntree, - ListBase *gpumaterials, - const char *name, - eGPUMaterialEngine engine, - uint64_t shader_uuid, - bool is_volume_shader, - bool is_lookdev, - GPUCodegenCallbackFn callback, - void *thunk, - GPUMaterialPassReplacementCallbackFn pass_replacement_cb = nullptr); - -void GPU_material_compile(GPUMaterial *mat); -void GPU_material_free_single(GPUMaterial *material); -void GPU_material_free(ListBase *gpumaterial); - -void GPU_material_async_compile(GPUMaterial *mat); -/** Returns true if the material have finished its compilation. */ -bool GPU_material_async_try_finalize(GPUMaterial *mat); - -void GPU_material_acquire(GPUMaterial *mat); -void GPU_material_release(GPUMaterial *mat); - -void GPU_materials_free(Main *bmain); - -Scene *GPU_material_scene(GPUMaterial *material); -GPUPass *GPU_material_get_pass(GPUMaterial *material); -/** Return the most optimal shader configuration for the given material. */ -GPUShader *GPU_material_get_shader(GPUMaterial *material); -/** Return the base un-optimized shader. */ -GPUShader *GPU_material_get_shader_base(GPUMaterial *material); -const char *GPU_material_get_name(GPUMaterial *material); - -/** - * Material Optimization. - * \note Compiles optimal version of shader graph, populating mat->optimized_pass. - * This operation should always be deferred until existing compilations have completed. - * Default un-optimized materials will still exist for interactive material editing performance. - */ -void GPU_material_optimize(GPUMaterial *mat); - -/** - * Return can be null if it's a world material. - */ -Material *GPU_material_get_material(GPUMaterial *material); -/** - * Return true if the material compilation has not yet begin or begin. - */ -eGPUMaterialStatus GPU_material_status(GPUMaterial *mat); -void GPU_material_status_set(GPUMaterial *mat, eGPUMaterialStatus status); - -/** - * Return status for asynchronous optimization jobs. - */ -eGPUMaterialOptimizationStatus GPU_material_optimization_status(GPUMaterial *mat); -void GPU_material_optimization_status_set(GPUMaterial *mat, eGPUMaterialOptimizationStatus status); -bool GPU_material_optimization_ready(GPUMaterial *mat); - -/** - * Store reference to a similar default material for asynchronous PSO cache warming. - * - * This function expects `material` to have not yet been compiled and for `default_material` to be - * ready. When compiling `material` as part of an asynchronous shader compilation job, use existing - * PSO descriptors from `default_material`'s shader to also compile PSOs for this new material - * asynchronously, rather than at runtime. - * - * The default_material `options` should match this new materials options in order - * for PSO descriptors to match those needed by the new `material`. - * - * NOTE: `default_material` must exist when `GPU_material_compile(..)` is called for - * `material`. - * - * See `GPU_shader_warm_cache(..)` for more information. - */ -void GPU_material_set_default(GPUMaterial *material, GPUMaterial *default_material); - -GPUUniformBuf *GPU_material_uniform_buffer_get(GPUMaterial *material); -/** - * Create dynamic UBO from parameters - * - * \param inputs: Items are #LinkData, data is #GPUInput (`BLI_genericNodeN(GPUInput)`). - */ -void GPU_material_uniform_buffer_create(GPUMaterial *material, ListBase *inputs); - -bool GPU_material_has_surface_output(GPUMaterial *mat); -bool GPU_material_has_volume_output(GPUMaterial *mat); -bool GPU_material_has_displacement_output(GPUMaterial *mat); - void GPU_material_flag_set(GPUMaterial *mat, eGPUMaterialFlag flag); -bool GPU_material_flag_get(const GPUMaterial *mat, eGPUMaterialFlag flag); eGPUMaterialFlag GPU_material_flag(const GPUMaterial *mat); -bool GPU_material_recalc_flag_get(GPUMaterial *mat); -uint64_t GPU_material_uuid_get(GPUMaterial *mat); - -void GPU_pass_cache_init(); -void GPU_pass_cache_garbage_collect(); -void GPU_pass_cache_free(); - -/* Requested Material Attributes and Textures */ - -struct GPUMaterialAttribute { - GPUMaterialAttribute *next, *prev; - int type; /* eCustomDataType */ - char name[68]; /* MAX_CUSTOMDATA_LAYER_NAME */ - char input_name[12 + 1]; /* GPU_MAX_SAFE_ATTR_NAME + 1 */ - eGPUType gputype; - eGPUDefaultValue default_value; /* Only for volumes attributes. */ - int id; - int users; - /** - * If true, the corresponding attribute is the specified default color attribute on the mesh, - * if it exists. In that case the type and name data can vary per geometry, so it will not be - * valid here. - */ - bool is_default_color; - /** - * If true, the attribute is the length of hair particles and curves. - */ - bool is_hair_length; -}; - -struct GPUMaterialTexture { - GPUMaterialTexture *next, *prev; - Image *ima; - ImageUser iuser; - bool iuser_available; - GPUTexture **colorband; - GPUTexture **sky; - char sampler_name[32]; /* Name of sampler in GLSL. */ - char tiled_mapping_name[32]; /* Name of tile mapping sampler in GLSL. */ - int users; - GPUSamplerState sampler_state; -}; - -ListBase GPU_material_attributes(const GPUMaterial *material); -ListBase GPU_material_textures(GPUMaterial *material); - -struct GPUUniformAttr { - GPUUniformAttr *next, *prev; - - /* Meaningful part of the attribute set key. */ - char name[68]; /* MAX_CUSTOMDATA_LAYER_NAME */ - /** Hash of name[68] + use_dupli. */ - uint32_t hash_code; - bool use_dupli; - - /* Helper fields used by code generation. */ - short id; - int users; -}; - -struct GPUUniformAttrList { - ListBase list; /* GPUUniformAttr */ - - /* List length and hash code precomputed for fast lookup and comparison. */ - unsigned int count, hash_code; -}; - -const GPUUniformAttrList *GPU_material_uniform_attributes(const GPUMaterial *material); GHash *GPU_uniform_attr_list_hash_new(const char *info); void GPU_uniform_attr_list_copy(GPUUniformAttrList *dest, const GPUUniformAttrList *src); void GPU_uniform_attr_list_free(GPUUniformAttrList *set); - -struct GPULayerAttr { - GPULayerAttr *next, *prev; - - /* Meaningful part of the attribute set key. */ - char name[256]; /* Multiple MAX_CUSTOMDATA_LAYER_NAME */ - /** Hash of name[68]. */ - uint32_t hash_code; - - /* Helper fields used by code generation. */ - int users; -}; - -const ListBase *GPU_material_layer_attributes(const GPUMaterial *material); - -/* A callback passed to GPU_material_from_callbacks to construct the material graph by adding and - * linking the necessary GPU material nodes. */ -using ConstructGPUMaterialFn = void (*)(void *thunk, GPUMaterial *material); - -/* Construct a GPU material from a set of callbacks. See the callback types for more information. - * The given thunk will be passed as the first parameter of each callback. */ -GPUMaterial *GPU_material_from_callbacks(eGPUMaterialEngine engine, - ConstructGPUMaterialFn construct_function_cb, - GPUCodegenCallbackFn generate_code_function_cb, - void *thunk); diff --git a/source/blender/gpu/GPU_pass.hh b/source/blender/gpu/GPU_pass.hh new file mode 100644 index 00000000000..373d7b39473 --- /dev/null +++ b/source/blender/gpu/GPU_pass.hh @@ -0,0 +1,48 @@ +/* SPDX-FileCopyrightText: 2025 Blender Authors + * + * SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup gpu + * + * Generate and cache shaders generated from the intermediate node graph. + */ + +#pragma once + +#include "GPU_material.hh" +#include "GPU_shader.hh" + +struct GPUNodeGraph; + +struct GPUPass; + +enum eGPUPassStatus { + GPU_PASS_FAILED = 0, + GPU_PASS_QUEUED, + GPU_PASS_SUCCESS, +}; + +GPUPass *GPU_generate_pass(GPUMaterial *material, + GPUNodeGraph *graph, + const char *debug_name, + eGPUMaterialEngine engine, + bool deferred_compilation, + GPUCodegenCallbackFn finalize_source_cb, + void *thunk, + bool optimize_graph); + +eGPUPassStatus GPU_pass_status(GPUPass *pass); +bool GPU_pass_should_optimize(GPUPass *pass); +void GPU_pass_ensure_its_ready(GPUPass *pass); +GPUShader *GPU_pass_shader_get(GPUPass *pass); +void GPU_pass_acquire(GPUPass *pass); +void GPU_pass_release(GPUPass *pass); + +uint64_t GPU_pass_global_compilation_count(); +uint64_t GPU_pass_compilation_timestamp(GPUPass *pass); + +void GPU_pass_cache_init(); +void GPU_pass_cache_update(); +void GPU_pass_cache_wait_for_all(); +void GPU_pass_cache_free(); diff --git a/source/blender/gpu/GPU_shader.hh b/source/blender/gpu/GPU_shader.hh index 82fb1f9ca52..107b999d4ed 100644 --- a/source/blender/gpu/GPU_shader.hh +++ b/source/blender/gpu/GPU_shader.hh @@ -105,6 +105,10 @@ blender::Vector GPU_shader_batch_finalize(BatchHandle &handle); * WARNING: The handle will be invalidated by this call. */ void GPU_shader_batch_cancel(BatchHandle &handle); +/** + * Wait until all the requested batches have been compiled. + */ +void GPU_shader_batch_wait_for_all(); /** \} */ diff --git a/source/blender/gpu/intern/gpu_codegen.cc b/source/blender/gpu/intern/gpu_codegen.cc index 57baba26e06..4a0f993aa08 100644 --- a/source/blender/gpu/intern/gpu_codegen.cc +++ b/source/blender/gpu/intern/gpu_codegen.cc @@ -10,184 +10,30 @@ #include "MEM_guardedalloc.h" -#include "DNA_customdata_types.h" #include "DNA_material_types.h" -#include "BLI_ghash.h" -#include "BLI_hash_mm2a.hh" -#include "BLI_link_utils.h" -#include "BLI_listbase.h" #include "BLI_span.hh" #include "BLI_string.h" -#include "BLI_threads.h" -#include "BLI_time.h" +#include "BLI_vector.hh" #include "BKE_cryptomatte.hh" -#include "BKE_material.hh" #include "IMB_colormanagement.hh" #include "GPU_capabilities.hh" -#include "GPU_context.hh" -#include "GPU_material.hh" #include "GPU_shader.hh" #include "GPU_uniform_buffer.hh" #include "GPU_vertex_format.hh" -#include "BLI_sys_types.h" /* for intptr_t support */ -#include "BLI_vector.hh" - #include "gpu_codegen.hh" -#include "gpu_node_graph.hh" -#include "gpu_shader_create_info.hh" #include "gpu_shader_dependency_private.hh" #include #include -#include -#include - +using namespace blender; using namespace blender::gpu::shader; -/** - * IMPORTANT: Never add external reference. The GPUMaterial used to create the GPUPass (and its - * GPUCodegenCreateInfo) can be free before actually compiling. This happens if there is an update - * before deferred compilation happens and the GPUPass gets picked up by another GPUMaterial - * (because of GPUPass reuse). - */ -struct GPUCodegenCreateInfo : ShaderCreateInfo { - struct NameBuffer { - using NameEntry = std::array; - - /** Duplicate attribute names to avoid reference the GPUNodeGraph directly. */ - char attr_names[16][GPU_MAX_SAFE_ATTR_NAME + 1]; - char var_names[16][8]; - blender::Vector, 16> sampler_names; - - /* Returns the appended name memory location */ - const char *append_sampler_name(const char name[32]) - { - auto index = sampler_names.size(); - sampler_names.append(std::make_unique()); - char *name_buffer = sampler_names[index]->data(); - memcpy(name_buffer, name, 32); - return name_buffer; - } - }; - - /** Optional generated interface. */ - StageInterfaceInfo *interface_generated = nullptr; - /** Optional name buffer containing names referenced by StringRefNull. */ - NameBuffer name_buffer; - - GPUCodegenCreateInfo(const char *name) : ShaderCreateInfo(name){}; - ~GPUCodegenCreateInfo() - { - delete interface_generated; - }; -}; - -struct GPUPass { - GPUPass *next = nullptr; - - GPUShader *shader = nullptr; - GPUCodegenCreateInfo *create_info = nullptr; - /** Orphaned GPUPasses gets freed by the garbage collector. */ - uint refcount = 0; - /** The last time the refcount was greater than 0. */ - int gc_timestamp = 0; - /** The engine type this pass is compiled for. */ - eGPUMaterialEngine engine = GPU_MAT_EEVEE_LEGACY; - /** Identity hash generated from all GLSL code. */ - uint32_t hash = 0; - /** Did we already tried to compile the attached GPUShader. */ - bool compiled = false; - /** If this pass is already being_compiled (A GPUPass can be shared by multiple GPUMaterials). */ - bool compilation_requested = false; - /** Hint that an optimized variant of this pass should be created based on a complexity heuristic - * during pass code generation. */ - bool should_optimize = false; - /** Whether pass is in the GPUPass cache. */ - bool cached = false; - /** Protects pass shader from being created from multiple threads at the same time. */ - ThreadMutex shader_creation_mutex = {}; - - BatchHandle async_compilation_handle = {}; -}; - -/* -------------------------------------------------------------------- */ -/** \name GPUPass Cache - * - * Internal shader cache: This prevent the shader recompilation / stall when - * using undo/redo AND also allows for GPUPass reuse if the Shader code is the - * same for 2 different Materials. Unused GPUPasses are free by Garbage collection. - * \{ */ - -/* Only use one linklist that contains the GPUPasses grouped by hash. */ -static GPUPass *pass_cache = nullptr; -static SpinLock pass_cache_spin; - -/* Search by hash only. Return first pass with the same hash. - * There is hash collision if (pass->next && pass->next->hash == hash) */ -static GPUPass *gpu_pass_cache_lookup(eGPUMaterialEngine engine, uint32_t hash) -{ - BLI_spin_lock(&pass_cache_spin); - /* Could be optimized with a Lookup table. */ - for (GPUPass *pass = pass_cache; pass; pass = pass->next) { - if (pass->hash == hash && pass->engine == engine) { - BLI_spin_unlock(&pass_cache_spin); - return pass; - } - } - BLI_spin_unlock(&pass_cache_spin); - return nullptr; -} - -static void gpu_pass_cache_insert_after(GPUPass *node, GPUPass *pass) -{ - BLI_spin_lock(&pass_cache_spin); - pass->cached = true; - if (node != nullptr) { - /* Add after the first pass having the same hash. */ - pass->next = node->next; - node->next = pass; - } - else { - /* No other pass have same hash, just prepend to the list. */ - BLI_LINKS_PREPEND(pass_cache, pass); - } - BLI_spin_unlock(&pass_cache_spin); -} - -/* Check all possible passes with the same hash. */ -static GPUPass *gpu_pass_cache_resolve_collision(GPUPass *pass, - GPUShaderCreateInfo *info, - uint32_t hash) -{ - eGPUMaterialEngine engine = pass->engine; - BLI_spin_lock(&pass_cache_spin); - for (; pass && (pass->hash == hash); pass = pass->next) { - if (*reinterpret_cast(info) == - *reinterpret_cast(pass->create_info) && - pass->engine == engine) - { - BLI_spin_unlock(&pass_cache_spin); - return pass; - } - } - BLI_spin_unlock(&pass_cache_spin); - return nullptr; -} - -static bool gpu_pass_is_valid(const GPUPass *pass) -{ - /* Shader is not null if compilation is successful. */ - return (pass->compiled == false || pass->shader != nullptr); -} - -/** \} */ - /* -------------------------------------------------------------------- */ /** \name Type > string conversion * \{ */ @@ -234,12 +80,12 @@ static std::ostream &operator<<(std::ostream &stream, const GPUOutput *output) } /* Print data constructor (i.e: vec2(1.0f, 1.0f)). */ -static std::ostream &operator<<(std::ostream &stream, const blender::Span &span) +static std::ostream &operator<<(std::ostream &stream, const Span &span) { stream << (eGPUType)span.size() << "("; /* Use uint representation to allow exact same bit pattern even if NaN. This is * because we can pass UINTs as floats for constants. */ - const blender::Span uint_span = span.cast(); + const Span uint_span = span.cast(); for (const uint32_t &element : uint_span) { char formatted_float[32]; SNPRINTF(formatted_float, "uintBitsToFloat(%uu)", element); @@ -257,84 +103,57 @@ struct GPUConstant : public GPUInput {}; static std::ostream &operator<<(std::ostream &stream, const GPUConstant *input) { - stream << blender::Span(input->vec, input->type); + stream << Span(input->vec, input->type); return stream; } +namespace blender::gpu::shader { +/* Needed to use the << operators from nested namespaces. :( + * https://stackoverflow.com/questions/5195512/namespaces-and-operator-resolution */ +using ::operator<<; +} // namespace blender::gpu::shader + /** \} */ /* -------------------------------------------------------------------- */ /** \name GLSL code generation * \{ */ -class GPUCodegen { - public: - GPUMaterial &mat; - GPUNodeGraph &graph; - GPUCodegenOutput output = {}; - GPUCodegenCreateInfo *create_info = nullptr; +const char *GPUCodegenCreateInfo::NameBuffer::append_sampler_name(const char name[32]) +{ + auto index = sampler_names.size(); + sampler_names.append(std::make_unique()); + char *name_buffer = sampler_names[index]->data(); + memcpy(name_buffer, name, 32); + return name_buffer; +} - private: - uint32_t hash_ = 0; - BLI_HashMurmur2A hm2a_; - ListBase ubo_inputs_ = {nullptr, nullptr}; - GPUInput *cryptomatte_input_ = nullptr; +GPUCodegen::GPUCodegen(GPUMaterial *mat_, GPUNodeGraph *graph_, const char *debug_name) + : mat(*mat_), graph(*graph_) +{ + BLI_hash_mm2a_init(&hm2a_, GPU_material_uuid_get(&mat)); + BLI_hash_mm2a_add_int(&hm2a_, GPU_material_flag(&mat)); + create_info = MEM_new(__func__, debug_name); + output.create_info = reinterpret_cast( + static_cast(create_info)); +} - /** Cache parameters for complexity heuristic. */ - uint nodes_total_ = 0; - uint textures_total_ = 0; - uint uniforms_total_ = 0; - - public: - GPUCodegen(GPUMaterial *mat_, GPUNodeGraph *graph_) : mat(*mat_), graph(*graph_) - { - BLI_hash_mm2a_init(&hm2a_, GPU_material_uuid_get(&mat)); - BLI_hash_mm2a_add_int(&hm2a_, GPU_material_flag(&mat)); - create_info = new GPUCodegenCreateInfo("codegen"); - output.create_info = reinterpret_cast( - static_cast(create_info)); - } - - ~GPUCodegen() - { - MEM_SAFE_FREE(cryptomatte_input_); - delete create_info; - BLI_freelistN(&ubo_inputs_); - }; - - void generate_graphs(); - void generate_cryptomatte(); - void generate_uniform_buffer(); - void generate_attribs(); - void generate_resources(); - void generate_library(); - - uint32_t hash_get() const - { - return hash_; - } - - /* Heuristic determined during pass codegen for whether a - * more optimal variant of this material should be compiled. */ - bool should_optimize_heuristic() const - { - /* If each of the maximal attributes are exceeded, we can optimize, but we should also ensure - * the baseline is met. */ - bool do_optimize = (nodes_total_ >= 60 || textures_total_ >= 4 || uniforms_total_ >= 64) && - (textures_total_ >= 1 && uniforms_total_ >= 8 && nodes_total_ >= 4); - return do_optimize; - } - - private: - void set_unique_ids(); - - void node_serialize(std::stringstream &eval_ss, const GPUNode *node); - std::string graph_serialize(eGPUNodeTag tree_tag, - GPUNodeLink *output_link, - const char *output_default = nullptr); - std::string graph_serialize(eGPUNodeTag tree_tag); +GPUCodegen::~GPUCodegen() +{ + MEM_SAFE_FREE(cryptomatte_input_); + MEM_delete(create_info); + BLI_freelistN(&ubo_inputs_); }; +bool GPUCodegen::should_optimize_heuristic() const +{ + /* If each of the maximal attributes are exceeded, we can optimize, but we should also ensure + * the baseline is met. */ + bool do_optimize = (nodes_total_ >= 60 || textures_total_ >= 4 || uniforms_total_ >= 64) && + (textures_total_ >= 1 && uniforms_total_ >= 8 && nodes_total_ >= 4); + return do_optimize; +} + void GPUCodegen::generate_attribs() { if (BLI_listbase_is_empty(&graph.attributes)) { @@ -344,7 +163,7 @@ void GPUCodegen::generate_attribs() GPUCodegenCreateInfo &info = *create_info; - info.interface_generated = new StageInterfaceInfo("codegen_iface", "var_attrs"); + info.interface_generated = MEM_new(__func__, "codegen_iface", "var_attrs"); StageInterfaceInfo &iface = *info.interface_generated; info.vertex_out(iface); @@ -360,8 +179,8 @@ void GPUCodegen::generate_attribs() STRNCPY(info.name_buffer.attr_names[slot], attr->input_name); SNPRINTF(info.name_buffer.var_names[slot], "v%d", attr->id); - blender::StringRefNull attr_name = info.name_buffer.attr_names[slot]; - blender::StringRefNull var_name = info.name_buffer.var_names[slot]; + StringRefNull attr_name = info.name_buffer.attr_names[slot]; + StringRefNull var_name = info.name_buffer.var_names[slot]; eGPUType input_type, iface_type; @@ -470,7 +289,7 @@ void GPUCodegen::generate_library() GPUCodegenCreateInfo &info = *create_info; void *value; - blender::Vector source_files; + Vector source_files; /* Iterate over libraries. We need to keep this struct intact in case it is required for the * optimization pass. The first pass just collects the keys from the GSET, given items in a GSET @@ -543,7 +362,7 @@ void GPUCodegen::node_serialize(std::stringstream &eval_ss, const GPUNode *node) if (from == GPU_VEC4 && to == GPU_FLOAT) { float coefficients[3]; IMB_colormanagement_get_luminance_coefficients(coefficients); - eval_ss << ", " << blender::Span(coefficients, 3); + eval_ss << ", " << Span(coefficients, 3); } eval_ss << ")"; @@ -628,8 +447,8 @@ void GPUCodegen::generate_cryptomatte() float material_hash = 0.0f; Material *material = GPU_material_get_material(&mat); if (material) { - blender::bke::cryptomatte::CryptomatteHash hash( - material->id.name + 2, BLI_strnlen(material->id.name + 2, MAX_NAME - 2)); + bke::cryptomatte::CryptomatteHash hash(material->id.name + 2, + BLI_strnlen(material->id.name + 2, MAX_NAME - 2)); material_hash = hash.float_encoded(); } cryptomatte_input_->vec[0] = material_hash; @@ -711,355 +530,3 @@ void GPUCodegen::generate_graphs() } /** \} */ - -/* -------------------------------------------------------------------- */ -/** \name GPUPass - * \{ */ - -GPUPass *GPU_generate_pass(GPUMaterial *material, - GPUNodeGraph *graph, - eGPUMaterialEngine engine, - GPUCodegenCallbackFn finalize_source_cb, - void *thunk, - bool optimize_graph) -{ - gpu_node_graph_prune_unused(graph); - - /* If Optimize flag is passed in, we are generating an optimized - * variant of the GPUMaterial's GPUPass. */ - if (optimize_graph) { - gpu_node_graph_optimize(graph); - } - - /* Extract attributes before compiling so the generated VBOs are ready to accept the future - * shader. */ - gpu_node_graph_finalize_uniform_attrs(graph); - - GPUCodegen codegen(material, graph); - codegen.generate_graphs(); - codegen.generate_cryptomatte(); - - GPUPass *pass_hash = nullptr; - - if (!optimize_graph) { - /* The optimized version of the shader should not re-generate a UBO. - * The UBO will not be used for this variant. */ - codegen.generate_uniform_buffer(); - - /** Cache lookup: Reuse shaders already compiled. - * NOTE: We only perform cache look-up for non-optimized shader - * graphs, as baked constant data among other optimizations will generate too many - * shader source permutations, with minimal re-usability. */ - pass_hash = gpu_pass_cache_lookup(engine, codegen.hash_get()); - - /* FIXME(fclem): This is broken. Since we only check for the hash and not the full source - * there is no way to have a collision currently. Some advocated to only use a bigger hash. */ - if (pass_hash && (pass_hash->next == nullptr || pass_hash->next->hash != codegen.hash_get())) { - if (!gpu_pass_is_valid(pass_hash)) { - /* Shader has already been created but failed to compile. */ - return nullptr; - } - /* No collision, just return the pass. */ - BLI_spin_lock(&pass_cache_spin); - pass_hash->refcount += 1; - BLI_spin_unlock(&pass_cache_spin); - return pass_hash; - } - } - - /* Either the shader is not compiled or there is a hash collision... - * continue generating the shader strings. */ - codegen.generate_attribs(); - codegen.generate_resources(); - codegen.generate_library(); - - /* Make engine add its own code and implement the generated functions. */ - finalize_source_cb(thunk, material, &codegen.output); - - GPUPass *pass = nullptr; - if (pass_hash) { - /* Cache lookup: Reuse shaders already compiled. */ - pass = gpu_pass_cache_resolve_collision( - pass_hash, codegen.output.create_info, codegen.hash_get()); - } - - if (pass) { - /* Cache hit. Reuse the same GPUPass and GPUShader. */ - if (!gpu_pass_is_valid(pass)) { - /* Shader has already been created but failed to compile. */ - return nullptr; - } - BLI_spin_lock(&pass_cache_spin); - pass->refcount += 1; - BLI_spin_unlock(&pass_cache_spin); - } - else { - /* We still create a pass even if shader compilation - * fails to avoid trying to compile again and again. */ - pass = MEM_new("GPUPass"); - pass->shader = nullptr; - pass->refcount = 1; - pass->create_info = codegen.create_info; - /* Finalize before adding the pass to the cache, to prevent race conditions. */ - pass->create_info->finalize(); - pass->engine = engine; - pass->hash = codegen.hash_get(); - pass->compiled = false; - pass->compilation_requested = false; - pass->cached = false; - /* Only flag pass optimization hint if this is the first generated pass for a material. - * Optimized passes cannot be optimized further, even if the heuristic is still not - * favorable. */ - pass->should_optimize = (!optimize_graph) && codegen.should_optimize_heuristic(); - pass->async_compilation_handle = -1; - BLI_mutex_init(&pass->shader_creation_mutex); - - codegen.create_info = nullptr; - - /* Only insert non-optimized graphs into cache. - * Optimized graphs will continuously be recompiled with new unique source during material - * editing, and thus causing the cache to fill up quickly with materials offering minimal - * re-use. */ - if (!optimize_graph) { - gpu_pass_cache_insert_after(pass_hash, pass); - } - } - return pass; -} - -bool GPU_pass_should_optimize(GPUPass *pass) -{ - /* Returns optimization heuristic prepared during - * initial codegen. - * NOTE: Optimization currently limited to Metal backend as repeated compilations required for - * material specialization cause impactful CPU stalls on OpenGL platforms. */ - return (GPU_backend_get_type() == GPU_BACKEND_METAL) && pass->should_optimize; -} - -/** \} */ - -/* -------------------------------------------------------------------- */ -/** \name Compilation - * \{ */ - -static int count_active_texture_sampler(GPUPass *pass, GPUShader *shader) -{ - int num_samplers = 0; - - for (const ShaderCreateInfo::Resource &res : pass->create_info->pass_resources_) { - if (res.bind_type == ShaderCreateInfo::Resource::BindType::SAMPLER) { - if (GPU_shader_get_uniform(shader, res.sampler.name.c_str()) != -1) { - num_samplers += 1; - } - } - } - - return num_samplers; -} - -static bool gpu_pass_shader_validate(GPUPass *pass, GPUShader *shader) -{ - if (shader == nullptr) { - return false; - } - - /* NOTE: The only drawback of this method is that it will count a sampler - * used in the fragment shader and only declared (but not used) in the vertex - * shader as used by both. But this corner case is not happening for now. */ - int active_samplers_len = count_active_texture_sampler(pass, shader); - - /* Validate against opengl limit. */ - if ((active_samplers_len > GPU_max_textures_frag()) || - (active_samplers_len > GPU_max_textures_vert())) - { - return false; - } - - if (pass->create_info->geometry_source_.is_empty() == false) { - if (active_samplers_len > GPU_max_textures_geom()) { - return false; - } - } - - return (active_samplers_len * 3 <= GPU_max_textures()); -} - -GPUShaderCreateInfo *GPU_pass_begin_compilation(GPUPass *pass, const char *shname) -{ - if (!pass->compilation_requested) { - pass->compilation_requested = true; - pass->create_info->name_ = shname; - GPUShaderCreateInfo *info = reinterpret_cast( - static_cast(pass->create_info)); - return info; - } - return nullptr; -} - -bool GPU_pass_finalize_compilation(GPUPass *pass, GPUShader *shader) -{ - bool success = true; - if (!pass->compiled) { - /* NOTE: Some drivers / gpu allows more active samplers than the opengl limit. - * We need to make sure to count active samplers to avoid undefined behavior. */ - if (!gpu_pass_shader_validate(pass, shader)) { - success = false; - if (shader != nullptr) { - fprintf(stderr, "GPUShader: error: too many samplers in shader.\n"); - GPU_shader_free(shader); - shader = nullptr; - } - } - pass->shader = shader; - pass->compiled = true; - } - return success; -} - -void GPU_pass_begin_async_compilation(GPUPass *pass, const char *shname) -{ - BLI_mutex_lock(&pass->shader_creation_mutex); - - if (pass->async_compilation_handle == -1) { - if (GPUShaderCreateInfo *info = GPU_pass_begin_compilation(pass, shname)) { - pass->async_compilation_handle = GPU_shader_batch_create_from_infos({info}); - } - else { - /* The pass has been already compiled synchronously. */ - BLI_assert(pass->compiled); - pass->async_compilation_handle = 0; - } - } - - BLI_mutex_unlock(&pass->shader_creation_mutex); -} - -bool GPU_pass_async_compilation_try_finalize(GPUPass *pass) -{ - BLI_mutex_lock(&pass->shader_creation_mutex); - - BLI_assert(pass->async_compilation_handle != -1); - if (pass->async_compilation_handle) { - if (GPU_shader_batch_is_ready(pass->async_compilation_handle)) { - GPU_pass_finalize_compilation( - pass, GPU_shader_batch_finalize(pass->async_compilation_handle).first()); - } - } - - BLI_mutex_unlock(&pass->shader_creation_mutex); - - return pass->async_compilation_handle == 0; -} - -bool GPU_pass_compile(GPUPass *pass, const char *shname) -{ - BLI_mutex_lock(&pass->shader_creation_mutex); - - bool success = true; - if (pass->async_compilation_handle > 0) { - /* We're trying to compile this pass synchronously, but there's a pending asynchronous - * compilation already started. */ - success = GPU_pass_finalize_compilation( - pass, GPU_shader_batch_finalize(pass->async_compilation_handle).first()); - } - else if (GPUShaderCreateInfo *info = GPU_pass_begin_compilation(pass, shname)) { - GPUShader *shader = GPU_shader_create_from_info(info); - success = GPU_pass_finalize_compilation(pass, shader); - } - - BLI_mutex_unlock(&pass->shader_creation_mutex); - return success; -} - -GPUShader *GPU_pass_shader_get(GPUPass *pass) -{ - return pass->shader; -} - -static void gpu_pass_free(GPUPass *pass) -{ - BLI_assert(pass->refcount == 0); - BLI_mutex_end(&pass->shader_creation_mutex); - if (pass->shader) { - GPU_shader_free(pass->shader); - } - delete pass->create_info; - MEM_delete(pass); -} - -void GPU_pass_acquire(GPUPass *pass) -{ - BLI_spin_lock(&pass_cache_spin); - BLI_assert(pass->refcount > 0); - pass->refcount++; - BLI_spin_unlock(&pass_cache_spin); -} - -void GPU_pass_release(GPUPass *pass) -{ - BLI_spin_lock(&pass_cache_spin); - BLI_assert(pass->refcount > 0); - pass->refcount--; - /* Un-cached passes will not be filtered by garbage collection, so release here. */ - if (pass->refcount == 0 && !pass->cached) { - gpu_pass_free(pass); - } - BLI_spin_unlock(&pass_cache_spin); -} - -void GPU_pass_cache_garbage_collect() -{ - const int shadercollectrate = 60; /* hardcoded for now. */ - int ctime = int(BLI_time_now_seconds()); - - BLI_spin_lock(&pass_cache_spin); - GPUPass *next, **prev_pass = &pass_cache; - for (GPUPass *pass = pass_cache; pass; pass = next) { - next = pass->next; - if (pass->refcount > 0) { - pass->gc_timestamp = ctime; - } - else if (pass->gc_timestamp + shadercollectrate < ctime) { - /* Remove from list */ - *prev_pass = next; - gpu_pass_free(pass); - continue; - } - prev_pass = &pass->next; - } - BLI_spin_unlock(&pass_cache_spin); -} - -void GPU_pass_cache_init() -{ - BLI_spin_init(&pass_cache_spin); -} - -void GPU_pass_cache_free() -{ - BLI_spin_lock(&pass_cache_spin); - while (pass_cache) { - GPUPass *next = pass_cache->next; - gpu_pass_free(pass_cache); - pass_cache = next; - } - BLI_spin_unlock(&pass_cache_spin); - - BLI_spin_end(&pass_cache_spin); -} - -/** \} */ - -/* -------------------------------------------------------------------- */ -/** \name Module - * \{ */ - -void gpu_codegen_init() {} - -void gpu_codegen_exit() -{ - BKE_material_defaults_free_gpu(); - GPU_shader_free_builtin_shaders(); -} - -/** \} */ diff --git a/source/blender/gpu/intern/gpu_codegen.hh b/source/blender/gpu/intern/gpu_codegen.hh index d4a1ea56397..f107da1ca4a 100644 --- a/source/blender/gpu/intern/gpu_codegen.hh +++ b/source/blender/gpu/intern/gpu_codegen.hh @@ -10,39 +10,97 @@ #pragma once +#include "BLI_hash_mm2a.hh" +#include "BLI_listbase.h" +#include "BLI_vector.hh" + #include "GPU_material.hh" -#include "GPU_shader.hh" +#include "GPU_vertex_format.hh" +#include "gpu_node_graph.hh" +#include "gpu_shader_create_info.hh" -struct GPUNodeGraph; +#include +#include -struct GPUPass; +namespace blender::gpu::shader { -/* Pass */ +struct GPUCodegenCreateInfo : ShaderCreateInfo { + struct NameBuffer { + using NameEntry = std::array; -GPUPass *GPU_generate_pass(GPUMaterial *material, - GPUNodeGraph *graph, - eGPUMaterialEngine engine, - GPUCodegenCallbackFn finalize_source_cb, - void *thunk, - bool optimize_graph); -GPUShader *GPU_pass_shader_get(GPUPass *pass); -bool GPU_pass_compile(GPUPass *pass, const char *shname); -void GPU_pass_acquire(GPUPass *pass); -void GPU_pass_release(GPUPass *pass); -bool GPU_pass_should_optimize(GPUPass *pass); + /** Duplicate attribute names to avoid reference the GPUNodeGraph directly. */ + char attr_names[16][GPU_MAX_SAFE_ATTR_NAME + 1]; + char var_names[16][8]; + Vector, 16> sampler_names; -/* Custom pass compilation. */ + /* Returns the appended name memory location */ + const char *append_sampler_name(const char name[32]); + }; -GPUShaderCreateInfo *GPU_pass_begin_compilation(GPUPass *pass, const char *shname); -bool GPU_pass_finalize_compilation(GPUPass *pass, GPUShader *shader); + /** Optional generated interface. */ + StageInterfaceInfo *interface_generated = nullptr; + /** Optional name buffer containing names referenced by StringRefNull. */ + NameBuffer name_buffer; + /** Copy of the GPUMaterial name, to prevent dangling pointers. */ + std::string info_name_; -void GPU_pass_begin_async_compilation(GPUPass *pass, const char *shname); -/** NOTE: Unlike the non-async version, this one returns true when compilation has finalized, - * regardless if it succeeded or not. - * To check for success, see if `GPU_pass_shader_get() != nullptr`. */ -bool GPU_pass_async_compilation_try_finalize(GPUPass *pass); + GPUCodegenCreateInfo(const char *name) : ShaderCreateInfo(name), info_name_(name) + { + /* Base class is always initialized first, so we need to update the name_ pointer here. */ + name_ = info_name_.c_str(); + }; + ~GPUCodegenCreateInfo() + { + MEM_delete(interface_generated); + } +}; -/* Module */ +class GPUCodegen { + public: + GPUMaterial &mat; + GPUNodeGraph &graph; + GPUCodegenOutput output = {}; + GPUCodegenCreateInfo *create_info = nullptr; -void gpu_codegen_init(); -void gpu_codegen_exit(); + private: + uint32_t hash_ = 0; + BLI_HashMurmur2A hm2a_; + ListBase ubo_inputs_ = {nullptr, nullptr}; + GPUInput *cryptomatte_input_ = nullptr; + + /** Cache parameters for complexity heuristic. */ + uint nodes_total_ = 0; + uint textures_total_ = 0; + uint uniforms_total_ = 0; + + public: + GPUCodegen(GPUMaterial *mat_, GPUNodeGraph *graph_, const char *debug_name); + ~GPUCodegen(); + + void generate_graphs(); + void generate_cryptomatte(); + void generate_uniform_buffer(); + void generate_attribs(); + void generate_resources(); + void generate_library(); + + uint32_t hash_get() const + { + return hash_; + } + + /* Heuristic determined during pass codegen for whether a + * more optimal variant of this material should be compiled. */ + bool should_optimize_heuristic() const; + + private: + void set_unique_ids(); + + void node_serialize(std::stringstream &eval_ss, const GPUNode *node); + std::string graph_serialize(eGPUNodeTag tree_tag, + GPUNodeLink *output_link, + const char *output_default = nullptr); + std::string graph_serialize(eGPUNodeTag tree_tag); +}; + +} // namespace blender::gpu::shader diff --git a/source/blender/gpu/intern/gpu_context.cc b/source/blender/gpu/intern/gpu_context.cc index d2ad40ba269..eb48bfcaf1c 100644 --- a/source/blender/gpu/intern/gpu_context.cc +++ b/source/blender/gpu/intern/gpu_context.cc @@ -27,6 +27,7 @@ #include "GPU_context.hh" #include "GPU_batch.hh" +#include "GPU_pass.hh" #include "gpu_backend.hh" #include "gpu_context_private.hh" #include "gpu_matrix_private.hh" @@ -328,6 +329,8 @@ void GPU_render_step(bool force_resource_release) backend->render_step(force_resource_release); printf_begin(active_ctx); } + + GPU_pass_cache_update(); } /** \} */ diff --git a/source/blender/gpu/intern/gpu_init_exit.cc b/source/blender/gpu/intern/gpu_init_exit.cc index 636389809bb..1b54d290968 100644 --- a/source/blender/gpu/intern/gpu_init_exit.cc +++ b/source/blender/gpu/intern/gpu_init_exit.cc @@ -6,10 +6,12 @@ * \ingroup gpu */ -#include "GPU_init_exit.hh" /* interface */ -#include "GPU_batch.hh" +#include "BKE_material.hh" + +#include "GPU_batch.hh" +#include "GPU_init_exit.hh" /* interface */ +#include "GPU_pass.hh" -#include "intern/gpu_codegen.hh" #include "intern/gpu_private.hh" #include "intern/gpu_shader_create_info_private.hh" #include "intern/gpu_shader_dependency_private.hh" @@ -34,7 +36,7 @@ void GPU_init() gpu_shader_dependency_init(); gpu_shader_create_info_init(); - gpu_codegen_init(); + GPU_pass_cache_init(); gpu_batch_init(); } @@ -43,7 +45,10 @@ void GPU_exit() { gpu_batch_exit(); - gpu_codegen_exit(); + GPU_pass_cache_free(); + + BKE_material_defaults_free_gpu(); + GPU_shader_free_builtin_shaders(); gpu_backend_delete_resources(); diff --git a/source/blender/gpu/intern/gpu_material.cc b/source/blender/gpu/intern/gpu_material.cc index ad182465f8e..69ff26ccab7 100644 --- a/source/blender/gpu/intern/gpu_material.cc +++ b/source/blender/gpu/intern/gpu_material.cc @@ -29,33 +29,24 @@ #include "NOD_shader.h" #include "GPU_material.hh" +#include "GPU_pass.hh" #include "GPU_shader.hh" #include "GPU_texture.hh" #include "GPU_uniform_buffer.hh" #include "DRW_engine.hh" -#include "gpu_codegen.hh" #include "gpu_node_graph.hh" #include "atomic_ops.h" +static void gpu_material_ramp_texture_build(GPUMaterial *mat); +static void gpu_material_sky_texture_build(GPUMaterial *mat); + /* Structs */ #define MAX_COLOR_BAND 128 #define MAX_GPU_SKIES 8 -/** - * Whether the optimized variant of the GPUPass should be created asynchronously. - * Usage of this depends on whether there are possible threading challenges of doing so. - * Currently, the overhead of GPU_generate_pass is relatively small in comparison to shader - * compilation, though this option exists in case any potential scenarios for material graph - * optimization cause a slow down on the main thread. - * - * NOTE: The actual shader program for the optimized pass will always be compiled asynchronously, - * this flag controls whether shader node graph source serialization happens on the compilation - * worker thread as well. */ -#define ASYNC_OPTIMIZED_PASS_CREATION 0 - struct GPUColorBandBuilder { float pixels[MAX_COLOR_BAND][CM_TABLE + 1][4]; int current_layer; @@ -68,82 +59,354 @@ struct GPUSkyBuilder { struct GPUMaterial { /* Contains #GPUShader and source code for deferred compilation. - * Can be shared between similar material (i.e: sharing same node-tree topology). */ - GPUPass *pass; + * Can be shared between materials sharing same node-tree topology. */ + GPUPass *pass = nullptr; /* Optimized GPUPass, situationally compiled after initial pass for optimal realtime performance. * This shader variant bakes dynamic uniform data as constant. This variant will not use * the ubo, and instead bake constants directly into the shader source. */ - GPUPass *optimized_pass; - /* Optimization status. - * We also use this status to determine whether this material should be considered for - * optimization. Only sufficiently complex shaders benefit from constant-folding optimizations. - * `GPU_MAT_OPTIMIZATION_READY` -> shader should be optimized and is ready for optimization. - * `GPU_MAT_OPTIMIZATION_SKIP` -> Shader should not be optimized as it would not benefit - * performance to do so, based on the heuristic. - */ - eGPUMaterialOptimizationStatus optimization_status; - double creation_time; -#if ASYNC_OPTIMIZED_PASS_CREATION == 1 - struct DeferredOptimizePass { - GPUCodegenCallbackFn callback; - void *thunk; - } DeferredOptimizePass; - struct DeferredOptimizePass optimize_pass_info; -#endif + GPUPass *optimized_pass = nullptr; - /** UBOs for this material parameters. */ - GPUUniformBuf *ubo; - /** Compilation status. Do not use if shader is not GPU_MAT_SUCCESS. */ - eGPUMaterialStatus status; - /** Some flags about the nodetree & the needed resources. */ - eGPUMaterialFlag flag; - /** The engine type this material is compiled for. */ + /* UBOs for this material parameters. */ + GPUUniformBuf *ubo = nullptr; + /* Some flags about the nodetree & the needed resources. */ + eGPUMaterialFlag flag = GPU_MATFLAG_UPDATED; + /* The engine type this material is compiled for. */ eGPUMaterialEngine engine; /* Identify shader variations (shadow, probe, world background...) */ - uint64_t uuid; + uint64_t uuid = 0; /* Number of generated function. */ - int generated_function_len; - /** Object type for attribute fetching. */ - bool is_volume_shader; + int generated_function_len = 0; - /** DEPRECATED Currently only used for deferred compilation. */ - Scene *scene; - /** Source material, might be null. */ - Material *ma; - /** 1D Texture array containing all color bands. */ - GPUTexture *coba_tex; - /** Builder for coba_tex. */ - GPUColorBandBuilder *coba_builder; - /** 2D Texture array containing all sky textures. */ - GPUTexture *sky_tex; - /** Builder for sky_tex. */ - GPUSkyBuilder *sky_builder; + /* Source material, might be null. */ + Material *source_material = nullptr; + /* 1D Texture array containing all color bands. */ + GPUTexture *coba_tex = nullptr; + /* Builder for coba_tex. */ + GPUColorBandBuilder *coba_builder = nullptr; + /* 2D Texture array containing all sky textures. */ + GPUTexture *sky_tex = nullptr; + /* Builder for sky_tex. */ + GPUSkyBuilder *sky_builder = nullptr; /* Low level node graph(s). Also contains resources needed by the material. */ - GPUNodeGraph graph; + GPUNodeGraph graph = {}; - /** Default material reference used for PSO cache warming. Default materials may perform - * different operations, but the permutation will frequently share the same input PSO - * descriptors. This enables asynchronous PSO compilation as part of the deferred compilation - * pass, reducing runtime stuttering and responsiveness while compiling materials. */ - GPUMaterial *default_mat; + bool has_surface_output = false; + bool has_volume_output = false; + bool has_displacement_output = false; - /** DEPRECATED: To remove. */ - bool has_surface_output; - bool has_volume_output; - bool has_displacement_output; + std::string name; - uint32_t refcount; + GPUMaterial(eGPUMaterialEngine engine) : engine(engine) + { + graph.used_libraries = BLI_gset_new( + BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, "GPUNodeGraph.used_libraries"); + }; - bool do_batch_compilation; + ~GPUMaterial() + { + gpu_node_graph_free(&graph); -#ifndef NDEBUG - char name[64]; -#else - char name[16]; -#endif + if (optimized_pass != nullptr) { + GPU_pass_release(optimized_pass); + } + if (pass != nullptr) { + GPU_pass_release(pass); + } + if (ubo != nullptr) { + GPU_uniformbuf_free(ubo); + } + if (coba_builder != nullptr) { + MEM_freeN(coba_builder); + } + if (coba_tex != nullptr) { + GPU_texture_free(coba_tex); + } + if (sky_tex != nullptr) { + GPU_texture_free(sky_tex); + } + } }; -/* Functions */ +/* Public API */ + +GPUMaterial *GPU_material_from_nodetree(Material *ma, + bNodeTree *ntree, + ListBase *gpumaterials, + const char *name, + eGPUMaterialEngine engine, + uint64_t shader_uuid, + bool deferred_compilation, + GPUCodegenCallbackFn callback, + void *thunk, + GPUMaterialPassReplacementCallbackFn pass_replacement_cb) +{ + /* Search if this material is not already compiled. */ + LISTBASE_FOREACH (LinkData *, link, gpumaterials) { + GPUMaterial *mat = (GPUMaterial *)link->data; + if (mat->uuid == shader_uuid && mat->engine == engine) { + if (!deferred_compilation) { + GPU_pass_ensure_its_ready(mat->pass); + } + return mat; + } + } + + GPUMaterial *mat = MEM_new(__func__, engine); + mat->source_material = ma; + mat->uuid = shader_uuid; + mat->name = name; + + /* Localize tree to create links for reroute and mute. */ + bNodeTree *localtree = blender::bke::node_tree_localize(ntree, nullptr); + ntreeGPUMaterialNodes(localtree, mat); + + gpu_material_ramp_texture_build(mat); + gpu_material_sky_texture_build(mat); + + /* Use default material pass when possible. */ + if (GPUPass *default_pass = pass_replacement_cb ? pass_replacement_cb(thunk, mat) : nullptr) { + mat->pass = default_pass; + GPU_pass_acquire(mat->pass); + /** WORKAROUND: + * The node tree code is never executed in default replaced passes, + * but the GPU validation will still complain if the node tree UBO is not bound. + * So we create a dummy UBO with (at least) the size of the default material one (192 bytes). + * We allocate 256 bytes to leave some room for future changes. */ + mat->ubo = GPU_uniformbuf_create_ex(256, nullptr, "Dummy UBO"); + } + else { + /* Create source code and search pass cache for an already compiled version. */ + mat->pass = GPU_generate_pass( + mat, &mat->graph, mat->name.c_str(), engine, deferred_compilation, callback, thunk, false); + } + + /* Determine whether we should generate an optimized variant of the graph. + * Heuristic is based on complexity of default material pass and shader node graph. */ + if (GPU_pass_should_optimize(mat->pass)) { + mat->optimized_pass = GPU_generate_pass( + mat, &mat->graph, mat->name.c_str(), engine, true, callback, thunk, true); + } + + gpu_node_graph_free_nodes(&mat->graph); + /* Only free after GPU_pass_shader_get where GPUUniformBuf read data from the local tree. */ + blender::bke::node_tree_free_local_tree(localtree); + BLI_assert(!localtree->id.py_instance); /* Or call #BKE_libblock_free_data_py. */ + MEM_freeN(localtree); + + /* Note that even if building the shader fails in some way, we want to keep + * it to avoid trying to compile again and again, and simply do not use + * the actual shader on drawing. */ + LinkData *link = MEM_callocN("GPUMaterialLink"); + link->data = mat; + BLI_addtail(gpumaterials, link); + + return mat; +} + +GPUMaterial *GPU_material_from_callbacks(eGPUMaterialEngine engine, + ConstructGPUMaterialFn construct_function_cb, + GPUCodegenCallbackFn generate_code_function_cb, + void *thunk) +{ + /* Allocate a new material and its material graph. */ + GPUMaterial *material = MEM_new(__func__, engine); + + /* Construct the material graph by adding and linking the necessary GPU material nodes. */ + construct_function_cb(thunk, material); + + /* Create and initialize the texture storing color bands used by Ramp and Curve nodes. */ + gpu_material_ramp_texture_build(material); + + /* Lookup an existing pass in the cache or generate a new one. */ + material->pass = GPU_generate_pass(material, + &material->graph, + __func__, + engine, + false, + generate_code_function_cb, + thunk, + false); + + /* Determine whether we should generate an optimized variant of the graph. + * Heuristic is based on complexity of default material pass and shader node graph. */ + if (GPU_pass_should_optimize(material->pass)) { + material->optimized_pass = GPU_generate_pass(material, + &material->graph, + __func__, + engine, + true, + generate_code_function_cb, + thunk, + true); + } + + gpu_node_graph_free_nodes(&material->graph); + + return material; +} + +void GPU_material_free_single(GPUMaterial *material) +{ + MEM_delete(material); +} + +void GPU_material_free(ListBase *gpumaterial) +{ + LISTBASE_FOREACH (LinkData *, link, gpumaterial) { + GPUMaterial *material = static_cast(link->data); + GPU_material_free_single(material); + } + BLI_freelistN(gpumaterial); +} + +void GPU_materials_free(Main *bmain) +{ + LISTBASE_FOREACH (Material *, ma, &bmain->materials) { + GPU_material_free(&ma->gpumaterial); + } + + LISTBASE_FOREACH (World *, wo, &bmain->worlds) { + GPU_material_free(&wo->gpumaterial); + } + + BKE_material_defaults_free_gpu(); +} + +const char *GPU_material_get_name(GPUMaterial *material) +{ + return material->name.c_str(); +} + +uint64_t GPU_material_uuid_get(GPUMaterial *mat) +{ + return mat->uuid; +} + +Material *GPU_material_get_material(GPUMaterial *material) +{ + return material->source_material; +} + +GPUPass *GPU_material_get_pass(GPUMaterial *material) +{ + /* If an optimized pass variant is available, and optimization is + * flagged as complete, we use this one instead. */ + return GPU_material_optimization_status(material) == GPU_MAT_OPTIMIZATION_SUCCESS ? + material->optimized_pass : + material->pass; +} + +GPUShader *GPU_material_get_shader(GPUMaterial *material) +{ + return GPU_pass_shader_get(GPU_material_get_pass(material)); +} + +eGPUMaterialStatus GPU_material_status(GPUMaterial *mat) +{ + switch (GPU_pass_status(mat->pass)) { + case GPU_PASS_SUCCESS: + return GPU_MAT_SUCCESS; + case GPU_PASS_QUEUED: + return GPU_MAT_QUEUED; + default: + return GPU_MAT_FAILED; + } +} + +eGPUMaterialOptimizationStatus GPU_material_optimization_status(GPUMaterial *mat) +{ + if (!GPU_pass_should_optimize(mat->pass)) { + return GPU_MAT_OPTIMIZATION_SKIP; + } + + switch (GPU_pass_status(mat->optimized_pass)) { + case GPU_PASS_SUCCESS: + return GPU_MAT_OPTIMIZATION_SUCCESS; + case GPU_PASS_QUEUED: + return GPU_MAT_OPTIMIZATION_QUEUED; + default: + BLI_assert_unreachable(); + return GPU_MAT_OPTIMIZATION_SKIP; + } +} + +uint64_t GPU_material_compilation_timestamp(GPUMaterial *mat) +{ + return GPU_pass_compilation_timestamp(mat->pass); +} + +bool GPU_material_has_surface_output(GPUMaterial *mat) +{ + return mat->has_surface_output; +} + +bool GPU_material_has_volume_output(GPUMaterial *mat) +{ + return mat->has_volume_output; +} + +bool GPU_material_has_displacement_output(GPUMaterial *mat) +{ + return mat->has_displacement_output; +} + +bool GPU_material_flag_get(const GPUMaterial *mat, eGPUMaterialFlag flag) +{ + return (mat->flag & flag) != 0; +} + +eGPUMaterialFlag GPU_material_flag(const GPUMaterial *mat) +{ + return mat->flag; +} + +void GPU_material_flag_set(GPUMaterial *mat, eGPUMaterialFlag flag) +{ + if ((flag & GPU_MATFLAG_GLOSSY) && (mat->flag & GPU_MATFLAG_GLOSSY)) { + /* Tag material using multiple glossy BSDF as using clear coat. */ + mat->flag |= GPU_MATFLAG_COAT; + } + mat->flag |= flag; +} + +void GPU_material_uniform_buffer_create(GPUMaterial *material, ListBase *inputs) +{ + material->ubo = GPU_uniformbuf_create_from_list(inputs, material->name.c_str()); +} + +GPUUniformBuf *GPU_material_uniform_buffer_get(GPUMaterial *material) +{ + return material->ubo; +} + +ListBase GPU_material_attributes(const GPUMaterial *material) +{ + return material->graph.attributes; +} + +ListBase GPU_material_textures(GPUMaterial *material) +{ + return material->graph.textures; +} + +const GPUUniformAttrList *GPU_material_uniform_attributes(const GPUMaterial *material) +{ + const GPUUniformAttrList *attrs = &material->graph.uniform_attrs; + return attrs->count > 0 ? attrs : nullptr; +} + +const ListBase *GPU_material_layer_attributes(const GPUMaterial *material) +{ + const ListBase *attrs = &material->graph.layer_attrs; + return !BLI_listbase_is_empty(attrs) ? attrs : nullptr; +} + +GPUNodeGraph *gpu_material_node_graph(GPUMaterial *material) +{ + return &material->graph; +} + +/* Resources */ GPUTexture **gpu_material_sky_texture_layer_set( GPUMaterial *mat, int width, int height, const float *pixels, float *row) @@ -243,119 +506,7 @@ static void gpu_material_sky_texture_build(GPUMaterial *mat) mat->sky_builder = nullptr; } -void GPU_material_free_single(GPUMaterial *material) -{ - bool do_free = atomic_sub_and_fetch_uint32(&material->refcount, 1) == 0; - if (!do_free) { - return; - } - - gpu_node_graph_free(&material->graph); - - if (material->optimized_pass != nullptr) { - GPU_pass_release(material->optimized_pass); - } - if (material->pass != nullptr) { - GPU_pass_release(material->pass); - } - if (material->ubo != nullptr) { - GPU_uniformbuf_free(material->ubo); - } - if (material->coba_builder != nullptr) { - MEM_freeN(material->coba_builder); - } - if (material->coba_tex != nullptr) { - GPU_texture_free(material->coba_tex); - } - if (material->sky_tex != nullptr) { - GPU_texture_free(material->sky_tex); - } - MEM_freeN(material); -} - -void GPU_material_free(ListBase *gpumaterial) -{ - LISTBASE_FOREACH (LinkData *, link, gpumaterial) { - GPUMaterial *material = static_cast(link->data); - DRW_deferred_shader_remove(material); - GPU_material_free_single(material); - } - BLI_freelistN(gpumaterial); -} - -Scene *GPU_material_scene(GPUMaterial *material) -{ - return material->scene; -} - -GPUPass *GPU_material_get_pass(GPUMaterial *material) -{ - /* If an optimized pass variant is available, and optimization is - * flagged as complete, we use this one instead. */ - return ((GPU_material_optimization_status(material) == GPU_MAT_OPTIMIZATION_SUCCESS) && - material->optimized_pass) ? - material->optimized_pass : - material->pass; -} - -GPUShader *GPU_material_get_shader(GPUMaterial *material) -{ - /* If an optimized material shader variant is available, and optimization is - * flagged as complete, we use this one instead. */ - GPUShader *shader = ((GPU_material_optimization_status(material) == - GPU_MAT_OPTIMIZATION_SUCCESS) && - material->optimized_pass) ? - GPU_pass_shader_get(material->optimized_pass) : - nullptr; - return (shader) ? shader : ((material->pass) ? GPU_pass_shader_get(material->pass) : nullptr); -} - -GPUShader *GPU_material_get_shader_base(GPUMaterial *material) -{ - return (material->pass) ? GPU_pass_shader_get(material->pass) : nullptr; -} - -const char *GPU_material_get_name(GPUMaterial *material) -{ - return material->name; -} - -Material *GPU_material_get_material(GPUMaterial *material) -{ - return material->ma; -} - -GPUUniformBuf *GPU_material_uniform_buffer_get(GPUMaterial *material) -{ - return material->ubo; -} - -void GPU_material_uniform_buffer_create(GPUMaterial *material, ListBase *inputs) -{ - material->ubo = GPU_uniformbuf_create_from_list(inputs, material->name); -} - -ListBase GPU_material_attributes(const GPUMaterial *material) -{ - return material->graph.attributes; -} - -ListBase GPU_material_textures(GPUMaterial *material) -{ - return material->graph.textures; -} - -const GPUUniformAttrList *GPU_material_uniform_attributes(const GPUMaterial *material) -{ - const GPUUniformAttrList *attrs = &material->graph.uniform_attrs; - return attrs->count > 0 ? attrs : nullptr; -} - -const ListBase *GPU_material_layer_attributes(const GPUMaterial *material) -{ - const ListBase *attrs = &material->graph.layer_attrs; - return !BLI_listbase_is_empty(attrs) ? attrs : nullptr; -} +/* Code generation */ void GPU_material_output_surface(GPUMaterial *material, GPUNodeLink *link) { @@ -430,476 +581,3 @@ char *GPU_material_split_sub_function(GPUMaterial *material, return func_link->name; } - -GPUNodeGraph *gpu_material_node_graph(GPUMaterial *material) -{ - return &material->graph; -} - -eGPUMaterialStatus GPU_material_status(GPUMaterial *mat) -{ - return mat->status; -} - -void GPU_material_status_set(GPUMaterial *mat, eGPUMaterialStatus status) -{ - mat->status = status; -} - -eGPUMaterialOptimizationStatus GPU_material_optimization_status(GPUMaterial *mat) -{ - return mat->optimization_status; -} - -void GPU_material_optimization_status_set(GPUMaterial *mat, eGPUMaterialOptimizationStatus status) -{ - mat->optimization_status = status; - if (mat->optimization_status == GPU_MAT_OPTIMIZATION_READY) { - /* Reset creation timer to delay optimization pass. */ - mat->creation_time = BLI_time_now_seconds(); - } -} - -bool GPU_material_optimization_ready(GPUMaterial *mat) -{ - /* Timer threshold before optimizations will be queued. - * When materials are frequently being modified, optimization - * can incur CPU overhead from excessive compilation. - * - * As the optimization is entirely asynchronous, it is still beneficial - * to do this quickly to avoid build-up and improve runtime performance. - * The threshold just prevents compilations being queued frame after frame. */ - const double optimization_time_threshold_s = 1.2; - return ((BLI_time_now_seconds() - mat->creation_time) >= optimization_time_threshold_s); -} - -void GPU_material_set_default(GPUMaterial *material, GPUMaterial *default_material) -{ - if (material != default_material) { - material->default_mat = default_material; - } -} - -/* Code generation */ - -bool GPU_material_has_surface_output(GPUMaterial *mat) -{ - return mat->has_surface_output; -} - -bool GPU_material_has_volume_output(GPUMaterial *mat) -{ - return mat->has_volume_output; -} - -bool GPU_material_has_displacement_output(GPUMaterial *mat) -{ - return mat->has_displacement_output; -} - -void GPU_material_flag_set(GPUMaterial *mat, eGPUMaterialFlag flag) -{ - if ((flag & GPU_MATFLAG_GLOSSY) && (mat->flag & GPU_MATFLAG_GLOSSY)) { - /* Tag material using multiple glossy BSDF as using clear coat. */ - mat->flag |= GPU_MATFLAG_COAT; - } - mat->flag |= flag; -} - -bool GPU_material_flag_get(const GPUMaterial *mat, eGPUMaterialFlag flag) -{ - return (mat->flag & flag) != 0; -} - -eGPUMaterialFlag GPU_material_flag(const GPUMaterial *mat) -{ - return mat->flag; -} - -bool GPU_material_recalc_flag_get(GPUMaterial *mat) -{ - /* NOTE: Consumes the flags. */ - - bool updated = (mat->flag & GPU_MATFLAG_UPDATED) != 0; - mat->flag &= ~GPU_MATFLAG_UPDATED; - return updated; -} - -uint64_t GPU_material_uuid_get(GPUMaterial *mat) -{ - return mat->uuid; -} - -GPUMaterial *GPU_material_from_nodetree(Scene *scene, - Material *ma, - bNodeTree *ntree, - ListBase *gpumaterials, - const char *name, - eGPUMaterialEngine engine, - uint64_t shader_uuid, - bool is_volume_shader, - bool is_lookdev, - GPUCodegenCallbackFn callback, - void *thunk, - GPUMaterialPassReplacementCallbackFn pass_replacement_cb) -{ - /* Search if this material is not already compiled. */ - LISTBASE_FOREACH (LinkData *, link, gpumaterials) { - GPUMaterial *mat = (GPUMaterial *)link->data; - if (mat->uuid == shader_uuid && mat->engine == engine) { - return mat; - } - } - - GPUMaterial *mat = MEM_callocN("GPUMaterial"); - mat->ma = ma; - mat->scene = scene; - mat->engine = engine; - mat->uuid = shader_uuid; - mat->flag = GPU_MATFLAG_UPDATED; - mat->status = GPU_MAT_CREATED; - mat->default_mat = nullptr; - mat->is_volume_shader = is_volume_shader; - mat->graph.used_libraries = BLI_gset_new( - BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, "GPUNodeGraph.used_libraries"); - mat->refcount = 1; - STRNCPY(mat->name, name); - if (is_lookdev) { - mat->flag |= GPU_MATFLAG_LOOKDEV_HACK; - } - - /* Localize tree to create links for reroute and mute. */ - bNodeTree *localtree = blender::bke::node_tree_localize(ntree, nullptr); - ntreeGPUMaterialNodes(localtree, mat); - - gpu_material_ramp_texture_build(mat); - gpu_material_sky_texture_build(mat); - - /* Use default material pass when possible. */ - if (GPUPass *default_pass = pass_replacement_cb ? pass_replacement_cb(thunk, mat) : nullptr) { - mat->pass = default_pass; - GPU_pass_acquire(mat->pass); - /** WORKAROUND: - * The node tree code is never executed in default replaced passes, - * but the GPU validation will still complain if the node tree UBO is not bound. - * So we create a dummy UBO with (at least) the size of the default material one (192 bytes). - * We allocate 256 bytes to leave some room for future changes. */ - mat->ubo = GPU_uniformbuf_create_ex(256, nullptr, "Dummy UBO"); - } - else { - /* Create source code and search pass cache for an already compiled version. */ - mat->pass = GPU_generate_pass(mat, &mat->graph, engine, callback, thunk, false); - } - - if (mat->pass == nullptr) { - /* We had a cache hit and the shader has already failed to compile. */ - mat->status = GPU_MAT_FAILED; - gpu_node_graph_free(&mat->graph); - } - else { - /* Determine whether we should generate an optimized variant of the graph. - * Heuristic is based on complexity of default material pass and shader node graph. */ - if (GPU_pass_should_optimize(mat->pass)) { - GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_READY); - } - - GPUShader *sh = GPU_pass_shader_get(mat->pass); - if (sh != nullptr) { - /* We had a cache hit and the shader is already compiled. */ - mat->status = GPU_MAT_SUCCESS; - - if (mat->optimization_status == GPU_MAT_OPTIMIZATION_SKIP) { - gpu_node_graph_free_nodes(&mat->graph); - } - } - - /* Generate optimized pass. */ - if (mat->optimization_status == GPU_MAT_OPTIMIZATION_READY) { -#if ASYNC_OPTIMIZED_PASS_CREATION == 1 - mat->optimized_pass = nullptr; - mat->optimize_pass_info.callback = callback; - mat->optimize_pass_info.thunk = thunk; -#else - mat->optimized_pass = GPU_generate_pass(mat, &mat->graph, engine, callback, thunk, true); - if (mat->optimized_pass == nullptr) { - /* Failed to create optimized pass. */ - gpu_node_graph_free_nodes(&mat->graph); - GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP); - } - else { - GPUShader *optimized_sh = GPU_pass_shader_get(mat->optimized_pass); - if (optimized_sh != nullptr) { - /* Optimized shader already available. */ - gpu_node_graph_free_nodes(&mat->graph); - GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SUCCESS); - } - } -#endif - } - } - - /* Only free after GPU_pass_shader_get where GPUUniformBuf read data from the local tree. */ - blender::bke::node_tree_free_local_tree(localtree); - BLI_assert(!localtree->id.py_instance); /* Or call #BKE_libblock_free_data_py. */ - MEM_freeN(localtree); - - /* Note that even if building the shader fails in some way, we still keep - * it to avoid trying to compile again and again, and simply do not use - * the actual shader on drawing. */ - LinkData *link = MEM_callocN("GPUMaterialLink"); - link->data = mat; - BLI_addtail(gpumaterials, link); - - return mat; -} - -void GPU_material_acquire(GPUMaterial *mat) -{ - atomic_add_and_fetch_uint32(&mat->refcount, 1); -} - -void GPU_material_release(GPUMaterial *mat) -{ - GPU_material_free_single(mat); -} - -static void gpu_material_finalize(GPUMaterial *mat, bool success) -{ - mat->flag |= GPU_MATFLAG_UPDATED; - - if (success) { - GPUShader *sh = GPU_pass_shader_get(mat->pass); - if (sh != nullptr) { - - /** Perform asynchronous Render Pipeline State Object (PSO) compilation. - * - * Warm PSO cache within asynchronous compilation thread using default material as source. - * GPU_shader_warm_cache(..) performs the API-specific PSO compilation using the assigned - * parent shader's cached PSO descriptors as an input. - * - * This is only applied if the given material has a specified default reference - * material available, and the default material is already compiled. - * - * As PSOs do not always match for default shaders, we limit warming for PSO - * configurations to ensure compile time remains fast, as these first - * entries will be the most commonly used PSOs. As not all PSOs are necessarily - * required immediately, this limit should remain low (1-3 at most). */ - if (!ELEM(mat->default_mat, nullptr, mat)) { - if (mat->default_mat->pass != nullptr) { - GPUShader *parent_sh = GPU_pass_shader_get(mat->default_mat->pass); - if (parent_sh) { - /* Skip warming if cached pass is identical to the default material. */ - if (mat->default_mat->pass != mat->pass && parent_sh != sh) { - GPU_shader_set_parent(sh, parent_sh); - GPU_shader_warm_cache(sh, 1); - } - } - } - } - - /* Flag success. */ - mat->status = GPU_MAT_SUCCESS; - if (mat->optimization_status == GPU_MAT_OPTIMIZATION_SKIP) { - /* Only free node graph nodes if not required by secondary optimization pass. */ - gpu_node_graph_free_nodes(&mat->graph); - } - } - else { - mat->status = GPU_MAT_FAILED; - } - } - else { - mat->status = GPU_MAT_FAILED; - GPU_pass_release(mat->pass); - mat->pass = nullptr; - gpu_node_graph_free(&mat->graph); - } -} - -void GPU_material_compile(GPUMaterial *mat) -{ - bool success; - BLI_assert(ELEM(mat->status, GPU_MAT_QUEUED, GPU_MAT_CREATED)); - BLI_assert(mat->pass); - -/* NOTE: The shader may have already been compiled here since we are - * sharing GPUShader across GPUMaterials. In this case it's a no-op. */ -#ifndef NDEBUG - success = GPU_pass_compile(mat->pass, mat->name); -#else - success = GPU_pass_compile(mat->pass, __func__); -#endif - - gpu_material_finalize(mat, success); -} - -void GPU_material_async_compile(GPUMaterial *mat) -{ - BLI_assert(ELEM(mat->status, GPU_MAT_QUEUED, GPU_MAT_CREATED)); - BLI_assert(mat->pass); -#ifndef NDEBUG - const char *name = mat->name; -#else - const char *name = __func__; -#endif - GPU_pass_begin_async_compilation(mat->pass, name); -} - -bool GPU_material_async_try_finalize(GPUMaterial *mat) -{ - BLI_assert(ELEM(mat->status, GPU_MAT_QUEUED, GPU_MAT_CREATED)); - if (GPU_pass_async_compilation_try_finalize(mat->pass)) { - gpu_material_finalize(mat, GPU_pass_shader_get(mat->pass) != nullptr); - return true; - } - return false; -} - -void GPU_material_optimize(GPUMaterial *mat) -{ - /* If shader is flagged for skipping optimization or has already been successfully - * optimized, skip. */ - if (ELEM(mat->optimization_status, GPU_MAT_OPTIMIZATION_SKIP, GPU_MAT_OPTIMIZATION_SUCCESS)) { - return; - } - - /* If original shader has not been fully compiled, we are not - * ready to perform optimization. */ - if (mat->status != GPU_MAT_SUCCESS) { - /* Reset optimization status. */ - GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_READY); - return; - } - -#if ASYNC_OPTIMIZED_PASS_CREATION == 1 - /* If the optimized pass is not valid, first generate optimized pass. - * NOTE(Threading): Need to verify if GPU_generate_pass can cause side-effects, especially when - * used with "thunk". So far, this appears to work, and deferring optimized pass creation is more - * optimal, as these do not benefit from caching, due to baked constants. However, this could - * possibly be cause for concern for certain cases. */ - if (!mat->optimized_pass) { - mat->optimized_pass = GPU_generate_pass(mat, - &mat->graph, - mat->engine, - mat->optimize_pass_info.callback, - mat->optimize_pass_info.thunk, - true); - BLI_assert(mat->optimized_pass); - } -#else - if (!mat->optimized_pass) { - /* Optimized pass has not been created, skip future optimization attempts. */ - GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP); - return; - } -#endif - - bool success; -/* NOTE: The shader may have already been compiled here since we are - * sharing GPUShader across GPUMaterials. In this case it's a no-op. */ -#ifndef NDEBUG - success = GPU_pass_compile(mat->optimized_pass, mat->name); -#else - success = GPU_pass_compile(mat->optimized_pass, __func__); -#endif - - if (success) { - GPUShader *sh = GPU_pass_shader_get(mat->optimized_pass); - if (sh != nullptr) { - /** Perform asynchronous Render Pipeline State Object (PSO) compilation. - * - * Warm PSO cache within asynchronous compilation thread for optimized materials. - * This setup assigns the original unoptimized shader as a "parent" shader - * for the optimized version. This then allows the associated GPU backend to - * compile PSOs within this asynchronous pass, using the identical PSO descriptors of the - * parent shader. - * - * This eliminates all run-time stuttering associated with material optimization and ensures - * realtime material editing and animation remains seamless, while retaining optimal realtime - * performance. */ - GPUShader *parent_sh = GPU_pass_shader_get(mat->pass); - if (parent_sh) { - GPU_shader_set_parent(sh, parent_sh); - GPU_shader_warm_cache(sh, -1); - } - - /* Mark as complete. */ - GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SUCCESS); - } - else { - /* Optimized pass failed to compile. Disable any future optimization attempts. */ - GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP); - } - } - else { - /* Optimization pass generation failed. Disable future attempts to optimize. */ - GPU_pass_release(mat->optimized_pass); - mat->optimized_pass = nullptr; - GPU_material_optimization_status_set(mat, GPU_MAT_OPTIMIZATION_SKIP); - } - - /* Release node graph as no longer needed. */ - gpu_node_graph_free_nodes(&mat->graph); -} - -void GPU_materials_free(Main *bmain) -{ - LISTBASE_FOREACH (Material *, ma, &bmain->materials) { - GPU_material_free(&ma->gpumaterial); - } - - LISTBASE_FOREACH (World *, wo, &bmain->worlds) { - GPU_material_free(&wo->gpumaterial); - } - - BKE_material_defaults_free_gpu(); -} - -GPUMaterial *GPU_material_from_callbacks(eGPUMaterialEngine engine, - ConstructGPUMaterialFn construct_function_cb, - GPUCodegenCallbackFn generate_code_function_cb, - void *thunk) -{ - /* Allocate a new material and its material graph, and initialize its reference count. */ - GPUMaterial *material = MEM_callocN("GPUMaterial"); - material->graph.used_libraries = BLI_gset_new( - BLI_ghashutil_ptrhash, BLI_ghashutil_ptrcmp, "GPUNodeGraph.used_libraries"); - material->refcount = 1; - material->optimization_status = GPU_MAT_OPTIMIZATION_SKIP; - material->optimized_pass = nullptr; - material->default_mat = nullptr; - material->engine = engine; - - /* Construct the material graph by adding and linking the necessary GPU material nodes. */ - construct_function_cb(thunk, material); - - /* Create and initialize the texture storing color bands used by Ramp and Curve nodes. */ - gpu_material_ramp_texture_build(material); - - /* Lookup an existing pass in the cache or generate a new one. */ - material->pass = GPU_generate_pass( - material, &material->graph, material->engine, generate_code_function_cb, thunk, false); - material->optimized_pass = nullptr; - - /* The pass already exists in the pass cache but its shader already failed to compile. */ - if (material->pass == nullptr) { - material->status = GPU_MAT_FAILED; - gpu_node_graph_free(&material->graph); - return material; - } - - /* The pass already exists in the pass cache and its shader is already compiled. */ - GPUShader *shader = GPU_pass_shader_get(material->pass); - if (shader != nullptr) { - material->status = GPU_MAT_SUCCESS; - if (material->optimization_status == GPU_MAT_OPTIMIZATION_SKIP) { - /* Only free node graph if not required by secondary optimization pass. */ - gpu_node_graph_free_nodes(&material->graph); - } - return material; - } - - /* The material was created successfully but still needs to be compiled. */ - material->status = GPU_MAT_CREATED; - return material; -} diff --git a/source/blender/gpu/intern/gpu_pass.cc b/source/blender/gpu/intern/gpu_pass.cc new file mode 100644 index 00000000000..d8d97bed752 --- /dev/null +++ b/source/blender/gpu/intern/gpu_pass.cc @@ -0,0 +1,417 @@ +/* SPDX-FileCopyrightText: 2025 Blender Authors + * + * SPDX-License-Identifier: GPL-2.0-or-later */ + +/** \file + * \ingroup gpu + * + * Convert material node-trees to GLSL. + */ + +#include "MEM_guardedalloc.h" + +#include "BLI_map.hh" +#include "BLI_span.hh" +#include "BLI_time.h" +#include "BLI_vector.hh" + +#include "GPU_capabilities.hh" +#include "GPU_context.hh" +#include "GPU_pass.hh" +#include "GPU_vertex_format.hh" +#include "gpu_codegen.hh" + +#include +#include + +using namespace blender; +using namespace blender::gpu::shader; + +static bool gpu_pass_validate(GPUCodegenCreateInfo *create_info); + +/* -------------------------------------------------------------------- */ +/** \name GPUPass + * \{ */ + +struct GPUPass { + static inline std::atomic compilation_counts = 0; + + GPUCodegenCreateInfo *create_info = nullptr; + BatchHandle compilation_handle = 0; + std::atomic shader = nullptr; + std::atomic status = GPU_PASS_QUEUED; + /* Orphaned GPUPasses gets freed by the garbage collector. */ + std::atomic refcount = 1; + /* The last time the refcount was greater than 0. */ + double gc_timestamp = 0.0f; + + uint64_t compilation_timestamp = 0; + + /** Hint that an optimized variant of this pass should be created. + * Based on a complexity heuristic from pass code generation. */ + bool should_optimize = false; + bool is_optimization_pass = false; + + GPUPass(GPUCodegenCreateInfo *info, + bool deferred_compilation, + bool is_optimization_pass, + bool should_optimize) + : create_info(info), + should_optimize(should_optimize), + is_optimization_pass(is_optimization_pass) + { + BLI_assert(!is_optimization_pass || !should_optimize); + if (is_optimization_pass && deferred_compilation) { + // Defer until all non optimization passes are compiled. + return; + } + + GPUShaderCreateInfo *base_info = reinterpret_cast(create_info); + + if (deferred_compilation) { + compilation_handle = GPU_shader_batch_create_from_infos( + Span(&base_info, 1)); + } + else { + shader = GPU_shader_create_from_info(base_info); + finalize_compilation(); + } + } + + ~GPUPass() + { + if (compilation_handle) { + GPU_shader_batch_cancel(compilation_handle); + } + else { + BLI_assert(create_info == nullptr || (is_optimization_pass && status == GPU_PASS_QUEUED)); + } + MEM_delete(create_info); + GPU_SHADER_FREE_SAFE(shader); + } + + void finalize_compilation() + { + BLI_assert_msg(create_info, "GPUPass::finalize_compilation() called more than once."); + + if (compilation_handle) { + shader = GPU_shader_batch_finalize(compilation_handle).first(); + } + + compilation_timestamp = ++compilation_counts; + + if (!shader && !gpu_pass_validate(create_info)) { + fprintf(stderr, "GPUShader: error: too many samplers in shader.\n"); + } + + status = shader ? GPU_PASS_SUCCESS : GPU_PASS_FAILED; + + MEM_delete(create_info); + create_info = nullptr; + } + + void update(double timestamp) + { + update_compilation(); + update_gc_timestamp(timestamp); + } + + void update_compilation() + { + if (compilation_handle) { + if (GPU_shader_batch_is_ready(compilation_handle)) { + finalize_compilation(); + } + } + else if (status == GPU_PASS_QUEUED && refcount > 0) { + BLI_assert(is_optimization_pass); + GPUShaderCreateInfo *base_info = reinterpret_cast(create_info); + compilation_handle = GPU_shader_batch_create_from_infos( + Span(&base_info, 1)); + } + } + + void update_gc_timestamp(double timestamp) + { + if (refcount != 0 || gc_timestamp == 0.0f) { + gc_timestamp = timestamp; + } + } + + bool should_gc(int gc_collect_rate, double timestamp) + { + BLI_assert(gc_timestamp != 0.0f); + return !compilation_handle && status != GPU_PASS_FAILED && + (timestamp - gc_timestamp) >= gc_collect_rate; + } +}; + +eGPUPassStatus GPU_pass_status(GPUPass *pass) +{ + return pass->status; +} + +bool GPU_pass_should_optimize(GPUPass *pass) +{ + /* Returns optimization heuristic prepared during + * initial codegen. + * NOTE: Only enabled on Metal, since it doesn't seem to yield any performance improvements for + * other backends. */ + return (GPU_backend_get_type() == GPU_BACKEND_METAL) && pass->should_optimize; + +#if 0 + /* Returns optimization heuristic prepared during initial codegen. + * NOTE: Optimization limited to parallel compilation as it causes CPU stalls otherwise. */ + return pass->should_optimize && GPU_use_parallel_compilation(); +#endif +} + +GPUShader *GPU_pass_shader_get(GPUPass *pass) +{ + return pass->shader; +} + +void GPU_pass_acquire(GPUPass *pass) +{ + int previous_refcount = pass->refcount++; + UNUSED_VARS_NDEBUG(previous_refcount); + BLI_assert(previous_refcount > 0); +} + +void GPU_pass_release(GPUPass *pass) +{ + int previous_refcount = pass->refcount--; + UNUSED_VARS_NDEBUG(previous_refcount); + BLI_assert(previous_refcount > 0); +} + +uint64_t GPU_pass_global_compilation_count() +{ + return GPUPass::compilation_counts; +} + +uint64_t GPU_pass_compilation_timestamp(GPUPass *pass) +{ + return pass->compilation_timestamp; +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name GPUPass Cache + * + * Internal shader cache: This prevent the shader recompilation / stall when + * using undo/redo AND also allows for GPUPass reuse if the Shader code is the + * same for 2 different Materials. Unused GPUPasses are free by Garbage collection. + * \{ */ + +class GPUPassCache { + + /* Number of seconds with 0 users required before garbage collecting a pass.*/ + static constexpr float gc_collect_rate_ = 60.0f; + /* Number of seconds without base compilations required before starting to compile optimization + * passes.*/ + static constexpr float optimization_delay_ = 10.0f; + + double last_base_compilation_timestamp_ = -1.0; + + Map> passes_[GPU_MAT_ENGINE_MAX][2 /*is_optimization_pass*/]; + std::mutex mutex_; + + public: + void add(eGPUMaterialEngine engine, + GPUCodegen &codegen, + bool deferred_compilation, + bool is_optimization_pass) + { + std::lock_guard lock(mutex_); + + passes_[engine][is_optimization_pass].add( + codegen.hash_get(), + std::make_unique(codegen.create_info, + deferred_compilation, + is_optimization_pass, + codegen.should_optimize_heuristic())); + }; + + GPUPass *get(eGPUMaterialEngine engine, + size_t hash, + bool allow_deferred, + bool is_optimization_pass) + { + std::lock_guard lock(mutex_); + std::unique_ptr *pass = passes_[engine][is_optimization_pass].lookup_ptr(hash); + if (!allow_deferred && pass && pass->get()->status == GPU_PASS_QUEUED) { + pass->get()->finalize_compilation(); + } + return pass ? pass->get() : nullptr; + } + + void update() + { + std::lock_guard lock(mutex_); + + double timestamp = BLI_time_now_seconds(); + + bool base_passes_ready = true; + + /* Base Passes. */ + for (auto &engine_passes : passes_) { + for (std::unique_ptr &pass : engine_passes[false].values()) { + pass->update(timestamp); + if (pass->status == GPU_PASS_QUEUED) { + base_passes_ready = false; + } + } + + engine_passes[false].remove_if( + [&](auto item) { return item.value->should_gc(gc_collect_rate_, timestamp); }); + } + + /* Optimization Passes GC. */ + for (auto &engine_passes : passes_) { + for (std::unique_ptr &pass : engine_passes[true].values()) { + pass->update_gc_timestamp(timestamp); + } + + engine_passes[true].remove_if( + /* TODO: Use lower rate for optimization passes? */ + [&](auto item) { return item.value->should_gc(gc_collect_rate_, timestamp); }); + } + + if (!base_passes_ready) { + last_base_compilation_timestamp_ = timestamp; + return; + } + + if ((timestamp - last_base_compilation_timestamp_) < optimization_delay_) { + return; + } + + /* Optimization Passes Compilation. */ + for (auto &engine_passes : passes_) { + for (std::unique_ptr &pass : engine_passes[true].values()) { + pass->update_compilation(); + } + } + } + + std::mutex &get_mutex() + { + return mutex_; + } +}; + +static GPUPassCache *g_cache = nullptr; + +void GPU_pass_ensure_its_ready(GPUPass *pass) +{ + if (pass->status == GPU_PASS_QUEUED) { + std::lock_guard lock(g_cache->get_mutex()); + if (pass->status == GPU_PASS_QUEUED) { + pass->finalize_compilation(); + } + } +} + +void GPU_pass_cache_init() +{ + g_cache = MEM_new(__func__); +} + +void GPU_pass_cache_update() +{ + g_cache->update(); +} + +void GPU_pass_cache_wait_for_all() +{ + GPU_shader_batch_wait_for_all(); + g_cache->update(); +} + +void GPU_pass_cache_free() +{ + MEM_SAFE_DELETE(g_cache); +} + +/** \} */ + +/* -------------------------------------------------------------------- */ +/** \name Compilation + * \{ */ + +static bool gpu_pass_validate(GPUCodegenCreateInfo *create_info) +{ + int samplers_len = 0; + for (const ShaderCreateInfo::Resource &res : create_info->resources_get_all_()) { + if (res.bind_type == ShaderCreateInfo::Resource::BindType::SAMPLER) { + samplers_len++; + } + } + + /* Validate against GPU limit. */ + if ((samplers_len > GPU_max_textures_frag()) || (samplers_len > GPU_max_textures_vert())) { + return false; + } + + return (samplers_len * 2 <= GPU_max_textures()); +} + +GPUPass *GPU_generate_pass(GPUMaterial *material, + GPUNodeGraph *graph, + const char *debug_name, + eGPUMaterialEngine engine, + bool deferred_compilation, + GPUCodegenCallbackFn finalize_source_cb, + void *thunk, + bool optimize_graph) +{ + gpu_node_graph_prune_unused(graph); + + /* If Optimize flag is passed in, we are generating an optimized + * variant of the GPUMaterial's GPUPass. */ + if (optimize_graph) { + gpu_node_graph_optimize(graph); + } + + /* Extract attributes before compiling so the generated VBOs are ready to accept the future + * shader. */ + gpu_node_graph_finalize_uniform_attrs(graph); + + GPUCodegen codegen(material, graph, debug_name); + codegen.generate_graphs(); + codegen.generate_cryptomatte(); + + GPUPass *pass = nullptr; + + if (!optimize_graph) { + /* The optimized version of the shader should not re-generate a UBO. + * The UBO will not be used for this variant. */ + codegen.generate_uniform_buffer(); + } + + /* Cache lookup: Reuse shaders already compiled. */ + pass = g_cache->get(engine, codegen.hash_get(), deferred_compilation, optimize_graph); + + if (pass) { + pass->refcount++; + return pass; + } + + /* The shader is not compiled, continue generating the shader strings. */ + codegen.generate_attribs(); + codegen.generate_resources(); + codegen.generate_library(); + + /* Make engine add its own code and implement the generated functions. */ + finalize_source_cb(thunk, material, &codegen.output); + + codegen.create_info->finalize(); + g_cache->add(engine, codegen, deferred_compilation, optimize_graph); + codegen.create_info = nullptr; + + return g_cache->get(engine, codegen.hash_get(), deferred_compilation, optimize_graph); +} + +/** \} */ diff --git a/source/blender/gpu/intern/gpu_shader.cc b/source/blender/gpu/intern/gpu_shader.cc index 2034177767c..69c8c6bb0d6 100644 --- a/source/blender/gpu/intern/gpu_shader.cc +++ b/source/blender/gpu/intern/gpu_shader.cc @@ -387,6 +387,11 @@ void GPU_shader_batch_cancel(BatchHandle &handle) GPUBackend::get()->get_compiler()->batch_cancel(handle); } +void GPU_shader_batch_wait_for_all() +{ + GPUBackend::get()->get_compiler()->wait_for_all(); +} + void GPU_shader_compile_static() { printf("Compiling all static GPU shaders. This process takes a while.\n"); @@ -1029,6 +1034,7 @@ bool ShaderCompiler::batch_is_ready(BatchHandle handle) Vector ShaderCompiler::batch_finalize(BatchHandle &handle) { std::unique_lock lock(mutex_); + /* TODO: Move to be first on the queue. */ compilation_finished_notification_.wait(lock, [&]() { return batches_.lookup(handle)->is_ready(); }); @@ -1116,6 +1122,24 @@ void ShaderCompiler::run_thread() } } +void ShaderCompiler::wait_for_all() +{ + std::unique_lock lock(mutex_); + compilation_finished_notification_.wait(lock, [&]() { + if (!compilation_queue_.empty()) { + return false; + } + + for (Batch *batch : batches_.values()) { + if (!batch->is_ready()) { + return false; + } + } + + return true; + }); +} + /** \} */ } // namespace blender::gpu diff --git a/source/blender/gpu/intern/gpu_shader_private.hh b/source/blender/gpu/intern/gpu_shader_private.hh index 07ab0f1bd88..c2d40cb4f99 100644 --- a/source/blender/gpu/intern/gpu_shader_private.hh +++ b/source/blender/gpu/intern/gpu_shader_private.hh @@ -223,6 +223,8 @@ class ShaderCompiler { SpecializationBatchHandle precompile_specializations(Span specializations); bool specialization_batch_is_ready(SpecializationBatchHandle &handle); + + void wait_for_all(); }; enum class Severity { diff --git a/source/blender/nodes/shader/nodes/node_shader_tex_environment.cc b/source/blender/nodes/shader/nodes/node_shader_tex_environment.cc index 7c907f54ff9..be5ea4cb144 100644 --- a/source/blender/nodes/shader/nodes/node_shader_tex_environment.cc +++ b/source/blender/nodes/shader/nodes/node_shader_tex_environment.cc @@ -56,9 +56,7 @@ static int node_shader_gpu_tex_environment(GPUMaterial *mat, GPUNodeLink *outalpha; - /* HACK(@fclem): For lookdev mode: do not compile an empty environment and just create an empty - * texture entry point. We manually bind to it after #DRW_shgroup_add_material_resources(). */ - if (!ima && !GPU_material_flag_get(mat, GPU_MATFLAG_LOOKDEV_HACK)) { + if (!ima) { return GPU_stack_link(mat, node, "node_tex_environment_empty", in, out); } diff --git a/source/blender/windowmanager/intern/wm_init_exit.cc b/source/blender/windowmanager/intern/wm_init_exit.cc index 37d9d4480ed..ff35c44a506 100644 --- a/source/blender/windowmanager/intern/wm_init_exit.cc +++ b/source/blender/windowmanager/intern/wm_init_exit.cc @@ -105,7 +105,7 @@ #include "GPU_context.hh" #include "GPU_init_exit.hh" -#include "GPU_material.hh" +#include "GPU_shader.hh" #include "COM_compositor.hh" @@ -161,8 +161,6 @@ void WM_init_gpu() GPU_init(); - GPU_pass_cache_init(); - if (G.debug & G_DEBUG_GPU_COMPILE_SHADERS) { GPU_shader_compile_static(); } @@ -645,7 +643,6 @@ void WM_exit_ex(bContext *C, const bool do_python_exit, const bool do_user_exit_ if (gpu_is_init) { DRW_gpu_context_enable_ex(false); UI_exit(); - GPU_pass_cache_free(); GPU_shader_cache_dir_clear_old(); GPU_exit(); DRW_gpu_context_disable_ex(false);