Metal: GLSL shader compatibility 3rd pass
Undefined behaviour for divergent control-flow fixes, replacement for partial vector references, and resolution of a number of calculation precision issues occuring on macOS. Authored by Apple: Michael Parkin-White Ref: T96261 Reviewed By: fclem Differential Revision: https://developer.blender.org/D14437
This commit is contained in:
committed by
Clément Foucault
parent
d62f443f2d
commit
b0dc3aff2c
@@ -15,7 +15,6 @@ uniform float visibilityRange;
|
||||
uniform float visibilityBlur;
|
||||
|
||||
uniform float sampleCount;
|
||||
uniform float;
|
||||
|
||||
out vec4 FragColor;
|
||||
|
||||
|
||||
@@ -44,8 +44,13 @@ void main()
|
||||
volumeObjectLocalCoord = (volumeObjectToTexture * vec4(volumeObjectLocalCoord, 1.0)).xyz;
|
||||
|
||||
if (any(lessThan(volumeObjectLocalCoord, vec3(0.0))) ||
|
||||
any(greaterThan(volumeObjectLocalCoord, vec3(1.0))))
|
||||
any(greaterThan(volumeObjectLocalCoord, vec3(1.0)))) {
|
||||
/* Note: Discard is not an explicit return in Metal prior to versions 2.3.
|
||||
* adding return after discard ensures consistent behaviour and avoids GPU
|
||||
* side-effects where control flow continues with undefined values. */
|
||||
discard;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef CLEAR
|
||||
|
||||
@@ -70,7 +70,11 @@ void main()
|
||||
vec3 Tr = exp(-s_extinction * s_len);
|
||||
|
||||
/* integrate along the current step segment */
|
||||
Lscat = (Lscat - Lscat * Tr) / max(vec3(1e-8), s_extinction);
|
||||
/* Note: Original calculation carries precision issues when compiling for AMD GPUs
|
||||
* and running Metal. This version of the equation retains precision well for all
|
||||
* macOS HW configurations. */
|
||||
Lscat = (Lscat * (1.0f - Tr)) / max(vec3(1e-8), s_extinction);
|
||||
|
||||
/* accumulate and also take into account the transmittance from previous steps */
|
||||
finalScattering += finalTransmittance * Lscat;
|
||||
|
||||
|
||||
@@ -358,6 +358,12 @@ void main()
|
||||
line_end = vec2(0.0, 0.5);
|
||||
break;
|
||||
default:
|
||||
/* Ensure values are assigned to, avoids undefined behaviour for
|
||||
* divergent control-flow. This can occur if discard is called
|
||||
* as discard is not treated as a return in Metal 2.2. So
|
||||
* side-effects can still cause problems. */
|
||||
line_start = vec2(0.0);
|
||||
line_end = vec2(0.0);
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
@@ -8,8 +8,10 @@
|
||||
/* From http://libnoise.sourceforge.net/noisegen/index.html */
|
||||
float integer_noise(int n)
|
||||
{
|
||||
n = (n >> 13) ^ n;
|
||||
int nn = (n * (n * n * 60493 + 19990303) + 1376312589) & 0x7fffffff;
|
||||
/* Integer bit-shifts cause precision issues due to overflow
|
||||
* in a number of workbench tests. Use uint instead. */
|
||||
uint nn = (uint(n) >> 13u) ^ uint(n);
|
||||
nn = (nn * (nn * nn * 60493u + 19990303u) + 1376312589u) & 0x7fffffffu;
|
||||
return (float(nn) / 1073741824.0);
|
||||
}
|
||||
|
||||
|
||||
@@ -218,7 +218,15 @@ void main()
|
||||
/* Manual depth test. TODO: remove. */
|
||||
float depth = texelFetch(depthBuffer, ivec2(gl_FragCoord.xy), 0).r;
|
||||
if (gl_FragCoord.z >= depth) {
|
||||
/* Note: In the Metal API, prior to Metal 2.3, Discard is not an explicit return and can
|
||||
* produce undefined behaviour. This is especially prominent with derivatives if control-flow
|
||||
* divergence is present.
|
||||
*
|
||||
* Adding a return call eliminates undefined behaviour and a later out-of-bounds read causing
|
||||
* a crash on AMD platforms.
|
||||
* This behaviour can also affect OpenGL on certain devices. */
|
||||
discard;
|
||||
return;
|
||||
}
|
||||
|
||||
vec3 Lscat;
|
||||
@@ -268,6 +276,7 @@ void main()
|
||||
/* Start is further away than the end.
|
||||
* That means no volume is intersected. */
|
||||
discard;
|
||||
return;
|
||||
}
|
||||
|
||||
fragColor = volume_integration(ls_ray_ori,
|
||||
|
||||
@@ -64,22 +64,35 @@ vec3 get_world_lighting(vec3 base_color, float roughness, float metallic, vec3 N
|
||||
if (world_data.use_specular) {
|
||||
/* Prepare Specular computation. Eval 4 lights at once. */
|
||||
vec3 R = -reflect(I, N);
|
||||
|
||||
#ifdef GPU_METAL
|
||||
/* Split vectors into arrays of floats. Partial vector references are unsupported by MSL. */
|
||||
float spec_angle[4], spec_NL[4], wrap_NL[4];
|
||||
# define AS_VEC4(a) vec4(a[0], a[1], a[2], a[3])
|
||||
#else
|
||||
vec4 spec_angle, spec_NL, wrap_NL;
|
||||
prep_specular(world_data.lights[0].direction.xyz, I, N, R, spec_NL.x, wrap_NL.x, spec_angle.x);
|
||||
prep_specular(world_data.lights[1].direction.xyz, I, N, R, spec_NL.y, wrap_NL.y, spec_angle.y);
|
||||
prep_specular(world_data.lights[2].direction.xyz, I, N, R, spec_NL.z, wrap_NL.z, spec_angle.z);
|
||||
prep_specular(world_data.lights[3].direction.xyz, I, N, R, spec_NL.w, wrap_NL.w, spec_angle.w);
|
||||
# define AS_VEC4(a) a
|
||||
#endif
|
||||
prep_specular(
|
||||
world_data.lights[0].direction.xyz, I, N, R, spec_NL[0], wrap_NL[0], spec_angle[0]);
|
||||
prep_specular(
|
||||
world_data.lights[1].direction.xyz, I, N, R, spec_NL[1], wrap_NL[1], spec_angle[1]);
|
||||
prep_specular(
|
||||
world_data.lights[2].direction.xyz, I, N, R, spec_NL[2], wrap_NL[2], spec_angle[2]);
|
||||
prep_specular(
|
||||
world_data.lights[3].direction.xyz, I, N, R, spec_NL[3], wrap_NL[3], spec_angle[3]);
|
||||
|
||||
vec4 gloss = vec4(1.0 - roughness);
|
||||
/* Reduce gloss for smooth light. (simulate bigger light) */
|
||||
gloss *= 1.0 - wrap;
|
||||
vec4 shininess = exp2(10.0 * gloss + 1.0);
|
||||
|
||||
vec4 spec_light = blinn_specular(shininess, spec_angle, spec_NL);
|
||||
vec4 spec_light = blinn_specular(shininess, AS_VEC4(spec_angle), AS_VEC4(spec_NL));
|
||||
|
||||
/* Simulate Env. light. */
|
||||
vec4 w = mix(wrap, vec4(1.0), roughness);
|
||||
vec4 spec_env = wrapped_lighting(wrap_NL, w);
|
||||
vec4 spec_env = wrapped_lighting(AS_VEC4(wrap_NL), w);
|
||||
#undef AS_VEC4
|
||||
|
||||
spec_light = mix(spec_light, spec_env, wrap * wrap);
|
||||
|
||||
|
||||
@@ -211,6 +211,11 @@ void hair_get_pos_tan_binor_time(bool is_persp,
|
||||
|
||||
wpos += wbinor * thick_time * scale;
|
||||
}
|
||||
else {
|
||||
/* Note: Ensures 'hairThickTime' is initialised -
|
||||
* avoids undefined behaviour on certain macOS configurations. */
|
||||
thick_time = 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
float hair_get_customdata_float(const samplerBuffer cd_buf)
|
||||
|
||||
@@ -18,7 +18,7 @@ void main(void)
|
||||
vec4 weights = hair_get_weights_cardinal(interp_time);
|
||||
finalColor = hair_interp_data(data0, data1, data2, data3, weights);
|
||||
|
||||
#ifdef TF_WORKAROUND
|
||||
#if defined(TF_WORKAROUND)
|
||||
int id = gl_VertexID - idOffset;
|
||||
gl_Position.x = ((float(id % targetWidth) + 0.5) / float(targetWidth)) * 2.0 - 1.0;
|
||||
gl_Position.y = ((float(id / targetWidth) + 0.5) / float(targetHeight)) * 2.0 - 1.0;
|
||||
@@ -26,5 +26,10 @@ void main(void)
|
||||
gl_Position.w = 1.0;
|
||||
|
||||
gl_PointSize = 1.0;
|
||||
#else
|
||||
# ifdef GPU_METAL
|
||||
/* Metal still expects an output position for TF shaders. */
|
||||
gl_Position = vec4(0.0, 0.0, 0.0, 1.0);
|
||||
# endif
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -569,7 +569,7 @@ SamplerState PointSampler
|
||||
# define SMAAGather(tex, coord) tex.Gather(LinearSampler, coord, 0)
|
||||
# endif
|
||||
#endif
|
||||
#if defined(SMAA_GLSL_3) || defined(SMAA_GLSL_4)
|
||||
#if defined(SMAA_GLSL_3) || defined(SMAA_GLSL_4) || defined(GPU_METAL)
|
||||
# define SMAATexture2D(tex) sampler2D tex
|
||||
# define SMAATexturePass2D(tex) tex
|
||||
# define SMAASampleLevelZero(tex, coord) textureLod(tex, coord, 0.0)
|
||||
@@ -641,14 +641,14 @@ float2 SMAACalculatePredicatedThreshold(float2 texcoord,
|
||||
*/
|
||||
void SMAAMovc(bool2 cond, inout float2 variable, float2 value)
|
||||
{
|
||||
SMAA_FLATTEN if (cond.x) variable.x = value.x;
|
||||
SMAA_FLATTEN if (cond.y) variable.y = value.y;
|
||||
/* Use select function (select(genType A, genType B, genBType cond)). */
|
||||
variable = select(variable, value, cond);
|
||||
}
|
||||
|
||||
void SMAAMovc(bool4 cond, inout float4 variable, float4 value)
|
||||
{
|
||||
SMAAMovc(cond.xy, variable.xy, value.xy);
|
||||
SMAAMovc(cond.zw, variable.zw, value.zw);
|
||||
/* Use select function (select(genType A, genType B, genBType cond)). */
|
||||
variable = select(variable, value, cond);
|
||||
}
|
||||
|
||||
#if SMAA_INCLUDE_VS
|
||||
@@ -1281,7 +1281,15 @@ float4 SMAABlendingWeightCalculationPS(float2 texcoord,
|
||||
|
||||
// Fix corners:
|
||||
coords.y = texcoord.y;
|
||||
SMAADetectHorizontalCornerPattern(SMAATexturePass2D(edgesTex), weights.rg, coords.xyzy, d);
|
||||
|
||||
# ifdef GPU_METAL
|
||||
/* Partial vector references are unsupported in MSL. */
|
||||
vec2 _weights = weights.rg;
|
||||
SMAADetectHorizontalCornerPattern(SMAATexturePass2D(edgesTex), _weights, coords.xyzy, d);
|
||||
weights.rg = _weights;
|
||||
# else
|
||||
SMAADetectHorizontalCornerPattern(SMAATexturePass2D(edgesTex), weights.rg, coords.xyzy, d);
|
||||
# endif
|
||||
|
||||
# if !defined(SMAA_DISABLE_DIAG_DETECTION)
|
||||
}
|
||||
@@ -1324,7 +1332,15 @@ float4 SMAABlendingWeightCalculationPS(float2 texcoord,
|
||||
|
||||
// Fix corners:
|
||||
coords.x = texcoord.x;
|
||||
|
||||
# ifdef GPU_METAL
|
||||
/* Partial vector references are unsupported in MSL. */
|
||||
vec2 _weights = weights.ba;
|
||||
SMAADetectVerticalCornerPattern(SMAATexturePass2D(edgesTex), _weights, coords.xyxz, d);
|
||||
weights.ba = _weights;
|
||||
# else
|
||||
SMAADetectVerticalCornerPattern(SMAATexturePass2D(edgesTex), weights.ba, coords.xyxz, d);
|
||||
# endif
|
||||
}
|
||||
|
||||
return weights;
|
||||
|
||||
@@ -209,10 +209,12 @@ vec3 hash_vec4_to_vec3(vec4 k)
|
||||
|
||||
float integer_noise(int n)
|
||||
{
|
||||
int nn;
|
||||
n = (n + 1013) & 0x7fffffff;
|
||||
n = (n >> 13) ^ n;
|
||||
nn = (n * (n * n * 60493 + 19990303) + 1376312589) & 0x7fffffff;
|
||||
/* Integer bit-shifts for these calculations can cause precision problems on macOS.
|
||||
* Using uint resolves these issues. */
|
||||
uint nn;
|
||||
nn = (uint(n) + 1013u) & 0x7fffffffu;
|
||||
nn = (nn >> 13u) ^ nn;
|
||||
nn = (uint(nn * (nn * nn * 60493u + 19990303u)) + 1376312589u) & 0x7fffffffu;
|
||||
return 0.5 * (float(nn) / 1073741824.0);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user