EEVEE: Optimize Upfront specialization

Many of the upfront specialized variants
were not needed/ They are only used if
some scene render setting changes, which
we can detect upfront.

This is noticeable on OpenGL which doesn't support
specialization constant and has to do full shader
recompilation for each variants.

Pull Request: https://projects.blender.org/blender/blender/pulls/138589
This commit is contained in:
Clément Foucault
2025-05-13 16:28:02 +02:00
committed by Clément Foucault
parent 4ab8267b7e
commit 90be031d6b
8 changed files with 81 additions and 33 deletions

View File

@@ -209,10 +209,13 @@ void Instance::init(const int2 &output_res,
lookdev.init(visible_rect);
shaders_are_ready_ = shaders.static_shaders_are_ready(is_image_render) &&
shaders.request_specializations(is_image_render,
render_buffers.data.shadow_id,
shadows.get_data().ray_count,
shadows.get_data().step_count);
shaders.request_specializations(
is_image_render,
render_buffers.data.shadow_id,
shadows.get_data().ray_count,
shadows.get_data().step_count,
DeferredLayer::do_split_direct_indirect_radiance(*this),
DeferredLayer::do_merge_direct_indirect_eval(*this));
skip_render_ = !shaders_are_ready_ || !film.is_valid_render_extent();
}
@@ -255,7 +258,9 @@ void Instance::init_light_bake(Depsgraph *depsgraph, draw::Manager *manager)
shaders.request_specializations(true,
render_buffers.data.shadow_id,
shadows.get_data().ray_count,
shadows.get_data().step_count);
shadows.get_data().step_count,
DeferredLayer::do_split_direct_indirect_radiance(*this),
DeferredLayer::do_merge_direct_indirect_eval(*this));
}
void Instance::set_time(float time)

View File

@@ -556,28 +556,38 @@ void DeferredLayer::begin_sync()
this->gbuffer_pass_sync(inst_);
}
bool DeferredLayer::do_merge_direct_indirect_eval(const Instance &inst)
{
return !inst.raytracing.use_raytracing();
}
bool DeferredLayer::do_split_direct_indirect_radiance(const Instance &inst)
{
return do_merge_direct_indirect_eval(inst) &&
(inst.sampling.use_clamp_direct() || inst.sampling.use_clamp_indirect());
}
void DeferredLayer::end_sync(bool is_first_pass,
bool is_last_pass,
bool next_layer_has_transmission)
{
const SceneEEVEE &sce_eevee = inst_.scene->eevee;
const bool has_any_closure = closure_bits_ != 0;
/* We need the feedback output in case of refraction in the next pass (see #126455). */
const bool is_layer_refracted = (next_layer_has_transmission && has_any_closure);
const bool has_transmit_closure = (closure_bits_ & (CLOSURE_REFRACTION | CLOSURE_TRANSLUCENT));
const bool has_reflect_closure = (closure_bits_ & (CLOSURE_REFLECTION | CLOSURE_DIFFUSE));
use_raytracing_ = (has_transmit_closure || has_reflect_closure) &&
(sce_eevee.flag & SCE_EEVEE_SSR_ENABLED) != 0;
use_clamp_direct_ = sce_eevee.clamp_surface_direct != 0.0f;
use_clamp_indirect_ = sce_eevee.clamp_surface_indirect != 0.0f;
inst_.raytracing.use_raytracing();
use_clamp_direct_ = inst_.sampling.use_clamp_direct();
use_clamp_indirect_ = inst_.sampling.use_clamp_indirect();
/* Is the radiance split for the combined pass. */
use_split_radiance_ = use_raytracing_ || (use_clamp_direct_ || use_clamp_indirect_);
/* The first pass will never have any surfaces behind it. Nothing is refracted except the
* environment. So in this case, disable tracing and fallback to probe. */
use_screen_transmission_ = use_raytracing_ && has_transmit_closure && !is_first_pass;
use_screen_reflection_ = use_raytracing_ && has_reflect_closure;
use_split_radiance_ = use_raytracing_ || (use_clamp_direct_ || use_clamp_indirect_);
use_feedback_output_ = (use_raytracing_ || is_layer_refracted) &&
(!is_last_pass || use_screen_reflection_);
@@ -651,7 +661,8 @@ void DeferredLayer::end_sync(bool is_first_pass,
}
{
const bool use_transmission = (closure_bits_ & CLOSURE_TRANSMISSION) != 0;
const bool use_split_indirect = !use_raytracing_ && use_split_radiance_;
const bool use_split_indirect = do_split_direct_indirect_radiance(inst_);
const bool use_lightprobe_eval = do_merge_direct_indirect_eval(inst_);
PassSimple::Sub &sub = pass.sub("Eval.Light");
/* Use depth test to reject background pixels which have not been stencil cleared. */
/* WORKAROUND: Avoid rasterizer discard by enabling stencil write, but the shaders actually
@@ -664,12 +675,9 @@ void DeferredLayer::end_sync(bool is_first_pass,
* See page 78 of "SIGGRAPH 2023: Unreal Engine Substrate" by Hillaire & de Rousiers. */
for (int i = min_ii(3, closure_count_) - 1; i >= 0; i--) {
GPUShader *sh = inst_.shaders.static_shader_get(eShaderType(DEFERRED_LIGHT_SINGLE + i));
/* TODO(fclem): Could specialize directly with the pass index but this would break it for
* OpenGL and Vulkan implementation which aren't fully supporting the specialize
* constant. */
sub.specialize_constant(sh, "render_pass_shadow_id", rbuf_data.shadow_id);
sub.specialize_constant(sh, "use_split_indirect", use_split_indirect);
sub.specialize_constant(sh, "use_lightprobe_eval", !use_raytracing_);
sub.specialize_constant(sh, "use_lightprobe_eval", use_lightprobe_eval);
sub.specialize_constant(sh, "use_transmission", false);
const ShadowSceneData &shadow_scene = inst_.shadows.get_data();
sub.specialize_constant(sh, "shadow_ray_count", &shadow_scene.ray_count);

View File

@@ -332,6 +332,11 @@ class DeferredLayer : DeferredLayerBase {
return closure_bits_ & CLOSURE_TRANSMISSION;
}
/* Do we compute indirect lighting inside the light eval pass. */
static bool do_merge_direct_indirect_eval(const Instance &inst);
/* Is the radiance split for the lighting pass. */
static bool do_split_direct_indirect_radiance(const Instance &inst);
/* Returns the radiance buffer to feed the next layer. */
GPUTexture *render(View &main_view,
View &render_view,

View File

@@ -35,6 +35,8 @@ void RayTraceModule::init()
fast_gi_step_count_ = sce_eevee.fast_gi_step_count;
fast_gi_ao_only_ = (sce_eevee.fast_gi_method == FAST_GI_AO_ONLY);
use_raytracing_ = (sce_eevee.flag & SCE_EEVEE_SSR_ENABLED) != 0;
float4 data(0.0f);
radiance_dummy_black_tx_.ensure_2d(
RAYTRACE_RADIANCE_FORMAT, int2(1), GPU_TEXTURE_USAGE_SHADER_READ, data);

View File

@@ -224,7 +224,9 @@ class RayTraceModule {
RaytraceEEVEE ray_tracing_options_;
int fast_gi_ray_count_ = 0;
int fast_gi_step_count_ = 0;
bool fast_gi_ao_only_ = 0;
bool fast_gi_ao_only_ = false;
bool use_raytracing_ = false;
RaytraceEEVEE_Method tracing_method_ = RAYTRACE_EEVEE_METHOD_PROBE;
@@ -273,6 +275,11 @@ class RayTraceModule {
void debug_pass_sync();
void debug_draw(View &view, GPUFrameBuffer *view_fb);
bool use_raytracing() const
{
return use_raytracing_;
}
private:
RayTraceResultTexture trace(int closure_index,
bool active_layer,

View File

@@ -142,6 +142,16 @@ class Sampling {
return sample_;
}
bool use_clamp_direct() const
{
return clamp_data_.surface_direct != 0.0f;
}
bool use_clamp_indirect() const
{
return clamp_data_.surface_indirect != 0.0f;
}
/* Return true if we are starting a new motion blur step. We need to run sync again since
* depsgraph was updated by MotionBlur::step(). */
bool do_render_sync() const

View File

@@ -117,7 +117,9 @@ bool ShaderModule::static_shaders_are_ready(bool block_until_ready)
bool ShaderModule::request_specializations(bool block_until_ready,
int render_buffers_shadow_id,
int shadow_ray_count,
int shadow_ray_step_count)
int shadow_ray_step_count,
bool use_split_indirect,
bool use_lightprobe_eval)
{
if (!GPU_use_parallel_compilation()) {
return true;
@@ -128,22 +130,23 @@ bool ShaderModule::request_specializations(bool block_until_ready,
std::lock_guard lock(mutex_);
SpecializationBatchHandle &specialization_handle = specialization_handles_.lookup_or_add_cb(
{render_buffers_shadow_id, shadow_ray_count, shadow_ray_step_count}, [&]() {
{render_buffers_shadow_id,
shadow_ray_count,
shadow_ray_step_count,
use_split_indirect,
use_lightprobe_eval},
[&]() {
Vector<ShaderSpecialization> specializations;
for (int i = 0; i < 3; i++) {
GPUShader *sh = static_shader_get(eShaderType(DEFERRED_LIGHT_SINGLE + i));
for (bool use_split_indirect : {false, true}) {
for (bool use_lightprobe_eval : {false, true}) {
for (bool use_transmission : {false, true}) {
specializations.append({sh,
{{"render_pass_shadow_id", render_buffers_shadow_id},
{"use_split_indirect", use_split_indirect},
{"use_lightprobe_eval", use_lightprobe_eval},
{"use_transmission", use_transmission},
{"shadow_ray_count", shadow_ray_count},
{"shadow_ray_step_count", shadow_ray_step_count}}});
}
}
for (bool use_transmission : {false, true}) {
specializations.append({sh,
{{"render_pass_shadow_id", render_buffers_shadow_id},
{"use_split_indirect", use_split_indirect},
{"use_lightprobe_eval", use_lightprobe_eval},
{"use_transmission", use_transmission},
{"shadow_ray_count", shadow_ray_count},
{"shadow_ray_step_count", shadow_ray_step_count}}});
}
}

View File

@@ -181,14 +181,20 @@ class ShaderModule {
public:
SpecializationsKey(int render_buffers_shadow_id,
int shadow_ray_count,
int shadow_ray_step_count)
int shadow_ray_step_count,
bool use_split_indirect,
bool use_lightprobe_eval)
{
BLI_assert(render_buffers_shadow_id >= -1);
BLI_assert(shadow_ray_count >= 1 && shadow_ray_count <= 4);
BLI_assert(shadow_ray_step_count >= 1 && shadow_ray_step_count <= 16);
BLI_assert(uint64_t(use_split_indirect) >= 0 && uint64_t(use_split_indirect) <= 1);
BLI_assert(uint64_t(use_lightprobe_eval) >= 0 && uint64_t(use_lightprobe_eval) <= 1);
hash_value_ = render_buffers_shadow_id + 1;
hash_value_ = (hash_value_ << 2) | (shadow_ray_count - 1);
hash_value_ = (hash_value_ << 4) | (shadow_ray_step_count - 1);
hash_value_ = (hash_value_ << 1) | uint64_t(use_split_indirect);
hash_value_ = (hash_value_ << 1) | uint64_t(use_lightprobe_eval);
}
uint64_t hash() const
@@ -224,7 +230,9 @@ class ShaderModule {
bool request_specializations(bool block_until_ready,
int render_buffers_shadow_id,
int shadow_ray_count,
int shadow_ray_step_count);
int shadow_ray_step_count,
bool use_split_indirect,
bool use_lightprobe_eval);
GPUShader *static_shader_get(eShaderType shader_type);
GPUMaterial *material_default_shader_get(eMaterialPipeline pipeline_type,