From c760248fc5aad57bafbf70642bc4c2423b2a9f61 Mon Sep 17 00:00:00 2001 From: Jason Fielder Date: Mon, 13 Feb 2023 15:47:21 +0000 Subject: [PATCH] Metal: Improve AMD EEVEE Performance Complex EEVEE nodegraphs, particularly those combining multiple principledBSDF shader nodes have a tendancy to require a large number of simultaneous live registers due to function call depth. In some instances, this causes substantial performance drop and corruption if the stack gets too large. To mitigate this, splitting calls to closure_eval such that only a single individual closure is evaluated in each call reduces the number of live registers required. This is preferred over using compound closure evaluation functions which require a large amount of in-flight data. Note that this is generally not more optimal, if the stack does not spill, as there is an increased instruction count. The specific trade-off depends on the exact architecture in question. Hence, this is limited to AMD GPUs. Authored by Apple: Michael Parkin-White Ref #96261 Pull Request #104985 --- .../shaders/closure_eval_surface_lib.glsl | 54 ++++++++++++++++++- .../infos/engine_eevee_legacy_shared.h | 8 +++ 2 files changed, 60 insertions(+), 2 deletions(-) diff --git a/source/blender/draw/engines/eevee/shaders/closure_eval_surface_lib.glsl b/source/blender/draw/engines/eevee/shaders/closure_eval_surface_lib.glsl index ffca97b6b8f..8bfc231f947 100644 --- a/source/blender/draw/engines/eevee/shaders/closure_eval_surface_lib.glsl +++ b/source/blender/draw/engines/eevee/shaders/closure_eval_surface_lib.glsl @@ -102,7 +102,7 @@ Closure closure_eval(ClosureTranslucent translucent) } CLOSURE_EVAL_FUNCTION_DECLARE_1(GlossyBSDF, Glossy) -Closure closure_eval(ClosureReflection reflection) +Closure closure_eval(ClosureReflection reflection, const bool do_output_ssr) { /* Glue with the old system. */ CLOSURE_VARS_DECLARE_1(Glossy); @@ -113,12 +113,22 @@ Closure closure_eval(ClosureReflection reflection) CLOSURE_EVAL_FUNCTION_1(GlossyBSDF, Glossy); Closure closure = CLOSURE_DEFAULT; - if (!output_ssr(reflection)) { + + bool output_radiance = true; + if (do_output_ssr) { + output_radiance = !output_ssr(reflection); + } + if (output_radiance) { closure.radiance += out_Glossy_0.radiance * reflection.color * reflection.weight; } return closure; } +Closure closure_eval(ClosureReflection reflection) +{ + return closure_eval(reflection, true); +} + CLOSURE_EVAL_FUNCTION_DECLARE_1(RefractionBSDF, Refraction) Closure closure_eval(ClosureRefraction refraction) { @@ -155,6 +165,13 @@ Closure closure_eval(ClosureTransparency transparency) CLOSURE_EVAL_FUNCTION_DECLARE_2(GlassBSDF, Glossy, Refraction) Closure closure_eval(ClosureReflection reflection, ClosureRefraction refraction) { + +#if defined(DO_SPLIT_CLOSURE_EVAL) + Closure closure = closure_eval(refraction); + Closure closure_reflection = closure_eval(reflection); + closure.radiance += closure_reflection.radiance; + return closure; +#else /* Glue with the old system. */ CLOSURE_VARS_DECLARE_2(Glossy, Refraction); @@ -172,12 +189,19 @@ Closure closure_eval(ClosureReflection reflection, ClosureRefraction refraction) closure.radiance += out_Glossy_0.radiance * reflection.color * reflection.weight; } return closure; +#endif } /* Dielectric BSDF */ CLOSURE_EVAL_FUNCTION_DECLARE_2(DielectricBSDF, Diffuse, Glossy) Closure closure_eval(ClosureDiffuse diffuse, ClosureReflection reflection) { +#if defined(DO_SPLIT_CLOSURE_EVAL) + Closure closure = closure_eval(diffuse); + Closure closure_reflection = closure_eval(reflection); + closure.radiance += closure_reflection.radiance; + return closure; +#else /* Glue with the old system. */ CLOSURE_VARS_DECLARE_2(Diffuse, Glossy); @@ -198,6 +222,7 @@ Closure closure_eval(ClosureDiffuse diffuse, ClosureReflection reflection) closure.radiance += out_Glossy_1.radiance * reflection.color * reflection.weight; } return closure; +#endif } /* Specular BSDF */ @@ -206,6 +231,13 @@ Closure closure_eval(ClosureDiffuse diffuse, ClosureReflection reflection, ClosureReflection clearcoat) { +#if defined(DO_SPLIT_CLOSURE_EVAL) + Closure closure = closure_eval(diffuse); + Closure closure_reflection = closure_eval(reflection); + Closure closure_clearcoat = closure_eval(clearcoat, false); + closure.radiance += closure_reflection.radiance + closure_clearcoat.radiance; + return closure; +#else /* Glue with the old system. */ CLOSURE_VARS_DECLARE_3(Diffuse, Glossy, Glossy); @@ -229,6 +261,7 @@ Closure closure_eval(ClosureDiffuse diffuse, closure.radiance += out_Glossy_1.radiance * reflection.color * reflection.weight; } return closure; +#endif } /* Principled BSDF */ @@ -238,6 +271,15 @@ Closure closure_eval(ClosureDiffuse diffuse, ClosureReflection clearcoat, ClosureRefraction refraction) { +#if defined(DO_SPLIT_CLOSURE_EVAL) + Closure closure = closure_eval(diffuse); + Closure closure_reflection = closure_eval(reflection); + Closure closure_clearcoat = closure_eval(clearcoat, false); + Closure closure_refraction = closure_eval(refraction); + closure.radiance += closure_reflection.radiance + closure_clearcoat.radiance + + closure_refraction.radiance; + return closure; +#else /* Glue with the old system. */ CLOSURE_VARS_DECLARE_4(Diffuse, Glossy, Glossy, Refraction); @@ -263,11 +305,18 @@ Closure closure_eval(ClosureDiffuse diffuse, closure.radiance += out_Glossy_1.radiance * reflection.color * reflection.weight; } return closure; +#endif } CLOSURE_EVAL_FUNCTION_DECLARE_2(PrincipledBSDFMetalClearCoat, Glossy, Glossy) Closure closure_eval(ClosureReflection reflection, ClosureReflection clearcoat) { +#if defined(DO_SPLIT_CLOSURE_EVAL) + Closure closure = closure_eval(clearcoat); + Closure closure_reflection = closure_eval(reflection); + closure.radiance += closure_reflection.radiance; + return closure; +#else /* Glue with the old system. */ CLOSURE_VARS_DECLARE_2(Glossy, Glossy); @@ -284,6 +333,7 @@ Closure closure_eval(ClosureReflection reflection, ClosureReflection clearcoat) closure.radiance += out_Glossy_0.radiance * reflection.color * reflection.weight; } return closure; +#endif } /* Not supported for surface shaders. */ diff --git a/source/blender/draw/engines/eevee/shaders/infos/engine_eevee_legacy_shared.h b/source/blender/draw/engines/eevee/shaders/infos/engine_eevee_legacy_shared.h index 1170078d9a5..cb06ee74ec7 100644 --- a/source/blender/draw/engines/eevee/shaders/infos/engine_eevee_legacy_shared.h +++ b/source/blender/draw/engines/eevee/shaders/infos/engine_eevee_legacy_shared.h @@ -16,6 +16,14 @@ typedef struct CommonUniformBlock CommonUniformBlock; # endif #endif +/* NOTE: AMD-based macOS platforms experience performance and correctness issues with EEVEE + * material closure evaluation. Using singular closure evaluation, rather than the compound + * function calls reduces register overflow, by limiting the simultaneous number of live + * registers used by the virtual GPU function stack. */ +#if (defined(GPU_METAL) && defined(GPU_ATI)) +# define DO_SPLIT_CLOSURE_EVAL 1 +#endif + struct CommonUniformBlock { mat4 pastViewProjectionMatrix; vec4 hizUvScale; /* To correct mip level texel misalignment */