GPU: Shader: Add wrapper to stage agnostic function

This avoid having to guards functions that are
only available in fragment shader stage.

Calling the function inside another stage is still
invalid and will yield a compile error on Metal.

The vulkan and opengl glsl patch need to be modified
per stage to allow the fragment specific function
to be defined.

This is not yet widely used, but a good example is
the change in `film_display_depth_amend`.

Rel #137261

Pull Request: https://projects.blender.org/blender/blender/pulls/138280
This commit is contained in:
Clément Foucault
2025-05-05 09:59:00 +02:00
committed by Clément Foucault
parent 92ac9f3b25
commit 8dee08996e
54 changed files with 238 additions and 111 deletions

View File

@@ -23,7 +23,7 @@ void main()
GBufferReader gbuf = gbuffer_read(gbuf_header_tx, gbuf_closure_tx, gbuf_normal_tx, texel);
if (gbuf.closure_count == 0) {
discard;
gpu_discard_fragment();
return;
}

View File

@@ -42,7 +42,7 @@ void main()
/* Display surfels as circles. */
if (distance(P, surfel.position) > debug_surfel_radius) {
discard;
gpu_discard_fragment();
return;
}
}

View File

@@ -28,7 +28,7 @@ void main()
* Discard fragments that do not have a number of closure whose bit-pattern
* overlap the current stencil un-masked bit. */
if ((current_bit & (closure_count | has_transmission)) == 0) {
discard;
gpu_discard_fragment();
return;
}
#endif

View File

@@ -44,7 +44,7 @@ void main()
shapes = saturate(1.0f - linearstep(-0.8f, 0.8f, shapes));
/* Outside of bokeh shape. Try to avoid overloading ROPs. */
if (reduce_max(shapes) == 0.0f) {
discard;
gpu_discard_fragment();
return;
}

View File

@@ -15,7 +15,7 @@ void main()
/* Discard outside the circle. */
if (dist_sqr > 1.0f) {
discard;
gpu_discard_fragment();
return;
}

View File

@@ -16,7 +16,7 @@ void main()
/* Discard outside the circle. */
if (dist_sqr > 1.0f) {
discard;
gpu_discard_fragment();
return;
}

View File

@@ -48,6 +48,6 @@ void main()
float out_depth = imageLoadFast(depth_img, texel).r;
out_depth = drw_depth_view_to_screen(-out_depth);
out_depth += 2.4e-7f * 4.0f + fwidth(out_depth);
out_depth += 2.4e-7f * 4.0f + gpu_fwidth(out_depth);
gl_FragDepth = saturate(out_depth);
}

View File

@@ -630,9 +630,7 @@ float film_display_depth_amend(int2 texel, float depth)
* twice. One for X and one for Y direction. */
/* TODO(fclem): This could be improved as it gives flickering result at depth discontinuity.
* But this is the quickest stable result I could come with for now. */
#ifdef GPU_FRAGMENT_SHADER
depth += fwidth(depth);
#endif
depth += gpu_fwidth(depth);
/* Small offset to avoid depth test lessEqual failing because of all the conversions loss. */
depth += 2.4e-7f * 4.0f;
return saturate(depth);

View File

@@ -13,7 +13,7 @@ void main()
float distance_from_center = distance(uv_coord.xy, float2(0.5f));
if (distance_from_center > 0.5f) {
discard;
gpu_discard_fragment();
return;
}
float smooth_size = texel_size.x * 1.5f;

View File

@@ -621,7 +621,7 @@ float2 bsdf_lut(float cos_theta, float roughness, float ior, bool do_multiscatte
/* Return new shading normal. */
float3 displacement_bump()
{
# if defined(GPU_FRAGMENT_SHADER) && !defined(MAT_GEOM_CURVES)
# if !defined(MAT_GEOM_CURVES)
/* This is the filter width for automatic displacement + bump mapping, which is fixed.
* NOTE: keep the same as default bump node filter width. */
constexpr float bump_filter_width = 0.1f;
@@ -629,8 +629,8 @@ float3 displacement_bump()
float2 dHd;
dF_branch(dot(nodetree_displacement(), g_data.N + dF_impl(g_data.N)), bump_filter_width, dHd);
float3 dPdx = dFdx(g_data.P);
float3 dPdy = dFdy(g_data.P);
float3 dPdx = gpu_dfdx(g_data.P);
float3 dPdy = gpu_dfdy(g_data.P);
/* Get surface tangents from normal. */
float3 Rx = cross(dPdy, g_data.N);

View File

@@ -50,7 +50,7 @@ void main()
float transparency = average(g_transmittance);
if (transparency > threshold) {
discard;
gpu_discard_fragment();
return;
}
#endif
@@ -60,7 +60,7 @@ void main()
* This would in turn create a discrepancy between the pre-pass depth and the G-buffer depth
* which exhibits missing pixels data. */
if (clip_interp.clip_distance > 0.0f) {
discard;
gpu_discard_fragment();
return;
}
#endif

View File

@@ -18,8 +18,8 @@ SHADER_LIBRARY_CREATE_INFO(eevee_geom_mesh)
#if defined(USE_BARYCENTRICS) && defined(GPU_FRAGMENT_SHADER) && defined(MAT_GEOM_MESH)
float3 barycentric_distances_get()
{
float wp_delta = length(dFdx(interp.P)) + length(dFdy(interp.P));
float bc_delta = length(dFdx(gpu_BaryCoord)) + length(dFdy(gpu_BaryCoord));
float wp_delta = length(gpu_dfdx(interp.P)) + length(gpu_dfdy(interp.P));
float bc_delta = length(gpu_dfdx(gpu_BaryCoord)) + length(gpu_dfdy(gpu_BaryCoord));
float rate_of_change = wp_delta / bc_delta;
return rate_of_change * (1.0f - gpu_BaryCoord);
}
@@ -107,7 +107,7 @@ void init_globals()
#ifdef GPU_FRAGMENT_SHADER
g_data.N = (FrontFacing) ? g_data.N : -g_data.N;
g_data.Ni = (FrontFacing) ? g_data.Ni : -g_data.Ni;
g_data.Ng = safe_normalize(cross(dFdx(g_data.P), dFdy(g_data.P)));
g_data.Ng = safe_normalize(cross(gpu_dfdx(g_data.P), gpu_dfdy(g_data.P)));
#endif
#if defined(MAT_GEOM_MESH)

View File

@@ -43,7 +43,7 @@ void main()
ndc_depth = 1.0f;
#else
# define discard_result \
discard; \
gpu_discard_fragment(); \
return;
#endif

View File

@@ -20,7 +20,7 @@ float transparency_hash_3d(float3 a)
float transparency_hashed_alpha_threshold(float hash_scale, float hash_offset, float3 P)
{
/* Find the discretized derivatives of our coordinates. */
float max_deriv = max(length(dFdx(P)), length(dFdy(P)));
float max_deriv = max(length(gpu_dfdx(P)), length(gpu_dfdy(P)));
float pix_scale = 1.0f / (hash_scale * max_deriv);
/* Find two nearest log-discretized noise scales. */
float pix_scale_log = log2(pix_scale);

View File

@@ -16,7 +16,7 @@ void main()
out_edges = max(out_edges, SMAALumaEdgeDetectionPS(uvs, offset, reveal_tx));
/* Discard if there is no edge. */
if (dot(out_edges, float2(1.0f, 1.0f)) == 0.0f) {
discard;
gpu_discard_fragment();
return;
}

View File

@@ -102,7 +102,7 @@ void main()
revealColor = float4(0.0f, 0.0f, 0.0f, frag_color.a);
if (frag_color.a < 0.001f) {
discard;
gpu_discard_fragment();
return;
}
}
@@ -113,7 +113,7 @@ void main()
/* Manual depth test */
float scene_depth = texture(gp_scene_depth_tx, uvs).r;
if (gl_FragCoord.z > scene_depth) {
discard;
gpu_discard_fragment();
return;
}
@@ -121,7 +121,7 @@ void main()
* depth written where the mask obliterate the layer. */
float mask = texture(gp_mask_tx, uvs).r;
if (mask < 0.001f) {
discard;
gpu_discard_fragment();
return;
}

View File

@@ -18,7 +18,7 @@ void main()
int2 uvs_clamped = int2(uv_screen);
float depth = texelFetch(depth_tx, uvs_clamped, 0).r;
if (depth == 1.0f) {
discard;
gpu_discard_fragment();
return;
}

View File

@@ -14,7 +14,7 @@ void main()
* but needed for view clarity in X-ray mode and support
* for inverted bone matrices. */
if ((inverted == 1) == gl_FrontFacing) {
discard;
gpu_discard_fragment();
return;
}
frag_color = float4(final_color.rgb, alpha);

View File

@@ -29,7 +29,7 @@ void main()
#ifndef SELECT_ENABLE
/* We cannot discard the fragment in selection mode. Otherwise we would break pipeline
* correctness (no discard if early depth test enforced). */
discard;
gpu_discard_fragment();
#endif
return;
}

View File

@@ -15,7 +15,7 @@ void main()
float4 inner_color = float4(float3(0.0f), 1.0f);
float4 outer_color = float4(0.0f);
float2 dd = fwidth(stipple_pos);
float2 dd = gpu_fwidth(stipple_pos);
float line_distance = distance(stipple_pos, stipple_start) / max(dd.x, dd.y);
if (OVERLAY_UVLineStyle(line_style) == OVERLAY_UV_LINE_STYLE_OUTLINE) {

View File

@@ -33,7 +33,7 @@ void main()
* This is because we force the early depth test to only output the front most fragment.
* Discarding would expose us to race condition depending on rasterization order. */
if (fract(dist / dash_width) > dash_factor) {
discard;
gpu_discard_fragment();
}
#endif

View File

@@ -43,8 +43,8 @@ float3 get_axes(float3 co, float3 fwidthCos, float line_size)
void main()
{
float3 P = local_pos * grid_buf.size.xyz;
float3 dFdxPos = dFdx(P);
float3 dFdyPos = dFdy(P);
float3 dFdxPos = gpu_dfdx(P);
float3 dFdyPos = gpu_dfdy(P);
float3 fwidthPos = abs(dFdxPos) + abs(dFdyPos);
P += drw_view_position() * plane_axes;
@@ -210,7 +210,7 @@ void main()
* (avoids popping visuals due to depth buffer precision) */
/* Harder settings tend to flicker more,
* but have less "see through" appearance. */
float bias = max(fwidth(gl_FragCoord.z), 2.4e-7f);
float bias = max(gpu_fwidth(gl_FragCoord.z), 2.4e-7f);
fade *= linearstep(grid_depth, grid_depth + bias, scene_depth);
}

View File

@@ -21,7 +21,7 @@ void main()
/* Arbitrary discard anything below 5% opacity.
* Note that this could be exposed to the User. */
if (tex_color.a < 0.05f) {
discard;
gpu_discard_fragment();
}
else {
frag_color.a = 1.0f;

View File

@@ -265,7 +265,7 @@ void main()
/* NOTE(Metal): Discards are not explicit returns in Metal. We should also return to avoid
* erroneous derivatives which can manifest during texture sampling in
* non-uniform-control-flow. */
discard;
gpu_discard_fragment();
return;
}

View File

@@ -25,7 +25,7 @@ void main()
gp_interp_noperspective.thickness.x,
gp_interp_noperspective.hardness) < 0.001f)
{
discard;
gpu_discard_fragment();
return;
}

View File

@@ -90,7 +90,7 @@ void main()
/* Contour display */
if (draw_contours) {
/* This must be executed uniformly for all fragments */
float weight_gradient = length(float2(dFdx(weight), dFdy(weight)));
float weight_gradient = length(float2(gpu_dfdx(weight), gpu_dfdy(weight)));
float4 grid = contour_grid(weight, weight_gradient);

View File

@@ -15,7 +15,7 @@ void main()
float dist = length(uv);
if (dist > 0.5f) {
discard;
gpu_discard_fragment();
return;
}
/* Nice sphere falloff. */

View File

@@ -16,7 +16,7 @@ void main()
/* Round point with jagged edges. */
if (dist_squared > rad_squared) {
discard;
gpu_discard_fragment();
return;
}

View File

@@ -16,7 +16,7 @@ void main()
/* Needed only because of wireframe slider.
* If we could get rid of it would be nice because of performance drain of discard. */
if (edge_start.r == -1.0f) {
discard;
gpu_discard_fragment();
return;
}
#endif

View File

@@ -18,7 +18,7 @@ void main()
return;
}
discard;
gpu_discard_fragment();
return;
}
@@ -34,6 +34,6 @@ void main()
return;
}
discard;
gpu_discard_fragment();
return;
}

View File

@@ -23,7 +23,7 @@ void main()
float depth = texture(depth_tx, uv).r;
if (depth == 1.0f) {
/* Skip the background. */
discard;
gpu_discard_fragment();
return;
}

View File

@@ -16,7 +16,7 @@ void main()
out_edges = SMAALumaEdgeDetectionPS(uvs, offset, color_tx);
/* Discard if there is no edge. */
if (dot(out_edges, float2(1.0f, 1.0f)) == 0.0f) {
discard;
gpu_discard_fragment();
return;
}

View File

@@ -59,7 +59,7 @@ float3 workbench_image_color(float2 uvs)
# ifdef GPU_FRAGMENT_SHADER
if (color.a < image_transparency_cutoff) {
discard;
gpu_discard_fragment();
}
# endif

View File

@@ -240,7 +240,7 @@ void main()
constexpr uint in_front_stencil_bits = 1u << 1;
if (do_depth_test && (stencil & in_front_stencil_bits) != 0) {
/* Don't draw on top of "in front" objects. */
discard;
gpu_discard_fragment();
return;
}
@@ -255,7 +255,7 @@ void main()
* Adding a return call eliminates undefined behavior and a later out-of-bounds read causing
* a crash on AMD platforms.
* This behavior can also affect OpenGL on certain devices. */
discard;
gpu_discard_fragment();
return;
}
@@ -305,7 +305,7 @@ void main()
if (dot(ls_ray_dir, ls_vol_isect) < 0.0f) {
/* Start is further away than the end.
* That means no volume is intersected. */
discard;
gpu_discard_fragment();
return;
}

View File

@@ -1016,18 +1016,13 @@ bool GLShader::do_geometry_shader_injection(const shader::ShaderCreateInfo *info
/** \name Shader stage creation
* \{ */
static StringRefNull glsl_patch_default_get()
static StringRefNull glsl_patch_vertex_get()
{
/** Used for shader patching. Init once. */
static std::string patch = []() {
std::stringstream ss;
/* Version need to go first. */
if (epoxy_gl_version() >= 43) {
ss << "#version 430\n";
}
else {
ss << "#version 330\n";
}
ss << "#version 430\n";
/* Enable extensions for features that are not part of our base GLSL version
* don't use an extension for something already available! */
@@ -1042,13 +1037,6 @@ static StringRefNull glsl_patch_default_get()
if (GLContext::native_barycentric_support) {
ss << "#extension GL_AMD_shader_explicit_vertex_parameter: enable\n";
}
if (GLContext::framebuffer_fetch_support) {
ss << "#extension GL_EXT_shader_framebuffer_fetch: enable\n";
}
if (GPU_stencil_export_support()) {
ss << "#extension GL_ARB_shader_stencil_export: enable\n";
ss << "#define GPU_ARB_shader_stencil_export\n";
}
/* Fallbacks. */
if (!GLContext::shader_draw_parameters_support) {
@@ -1057,11 +1045,75 @@ static StringRefNull glsl_patch_default_get()
/* Vulkan GLSL compatibility. */
ss << "#define gpu_InstanceIndex (gl_InstanceID + gpu_BaseInstance)\n";
ss << "#define gpu_EmitVertex EmitVertex\n";
/* Array compatibility. */
ss << "#define gpu_Array(_type) _type[]\n";
/* Needs to have this defined upfront for configuring shader defines. */
ss << "#define GPU_VERTEX_SHADER\n";
/* GLSL Backend Lib. */
ss << datatoc_glsl_shader_defines_glsl;
return ss.str();
}();
return patch;
}
static StringRefNull glsl_patch_geometry_get()
{
/** Used for shader patching. Init once. */
static std::string patch = []() {
std::stringstream ss;
/* Version need to go first. */
ss << "#version 430\n";
if (GLContext::layered_rendering_support) {
ss << "#extension GL_ARB_shader_viewport_layer_array: enable\n";
}
if (GLContext::native_barycentric_support) {
ss << "#extension GL_AMD_shader_explicit_vertex_parameter: enable\n";
}
/* Array compatibility. */
ss << "#define gpu_Array(_type) _type[]\n";
/* Needs to have this defined upfront for configuring shader defines. */
ss << "#define GPU_GEOMETRY_SHADER\n";
/* GLSL Backend Lib. */
ss << datatoc_glsl_shader_defines_glsl;
return ss.str();
}();
return patch;
}
static StringRefNull glsl_patch_fragment_get()
{
/** Used for shader patching. Init once. */
static std::string patch = []() {
std::stringstream ss;
/* Version need to go first. */
ss << "#version 430\n";
if (GLContext::layered_rendering_support) {
ss << "#extension GL_ARB_shader_viewport_layer_array: enable\n";
}
if (GLContext::native_barycentric_support) {
ss << "#extension GL_AMD_shader_explicit_vertex_parameter: enable\n";
}
if (GLContext::framebuffer_fetch_support) {
ss << "#extension GL_EXT_shader_framebuffer_fetch: enable\n";
}
if (GPU_stencil_export_support()) {
ss << "#extension GL_ARB_shader_stencil_export: enable\n";
ss << "#define GPU_ARB_shader_stencil_export\n";
}
/* Array compatibility. */
ss << "#define gpu_Array(_type) _type[]\n";
/* Needs to have this defined upfront for configuring shader defines. */
ss << "#define GPU_FRAGMENT_SHADER\n";
/* GLSL Backend Lib. */
ss << datatoc_glsl_shader_defines_glsl;
@@ -1077,11 +1129,13 @@ static StringRefNull glsl_patch_compute_get()
std::stringstream ss;
/* Version need to go first. */
ss << "#version 430\n";
ss << "#extension GL_ARB_compute_shader :enable\n";
/* Array compatibility. */
ss << "#define gpu_Array(_type) _type[]\n";
/* Needs to have this defined upfront for configuring shader defines. */
ss << "#define GPU_COMPUTE_SHADER\n";
ss << datatoc_glsl_shader_defines_glsl;
return ss.str();
@@ -1091,10 +1145,20 @@ static StringRefNull glsl_patch_compute_get()
StringRefNull GLShader::glsl_patch_get(GLenum gl_stage)
{
if (gl_stage == GL_VERTEX_SHADER) {
return glsl_patch_vertex_get();
}
if (gl_stage == GL_GEOMETRY_SHADER) {
return glsl_patch_geometry_get();
}
if (gl_stage == GL_FRAGMENT_SHADER) {
return glsl_patch_fragment_get();
}
if (gl_stage == GL_COMPUTE_SHADER) {
return glsl_patch_compute_get();
}
return glsl_patch_default_get();
BLI_assert_unreachable();
return "";
}
GLuint GLShader::create_shader_stage(GLenum gl_stage,

View File

@@ -735,7 +735,7 @@ float2 SMAALumaEdgeDetectionPS(float2 texcoord,
# ifndef SMAA_NO_DISCARD
// Then discard if there is no edge:
if (dot(edges, float2(1.0f, 1.0f)) == 0.0f) {
discard;
gpu_discard_fragment();
return float2(0.0f, 0.0f);
}
# endif
@@ -810,7 +810,7 @@ float2 SMAAColorEdgeDetectionPS(float2 texcoord,
# ifdef GPU_FRAGMENT_SHADER
// Then discard if there is no edge:
if (dot(edges, float2(1.0f, 1.0f)) == 0.0f) {
discard;
gpu_discard_fragment();
return float2(0.0f, 0.0f);
}
# endif
@@ -858,7 +858,7 @@ float2 SMAADepthEdgeDetectionPS(float2 texcoord, float4 offset[3], SMAATexture2D
# ifdef GPU_FRAGMENT_SHADER
if (dot(edges, float2(1.0f, 1.0f)) == 0.0f) {
discard;
gpu_discard_fragment();
return float2(0.0f, 0.0f);
}
# endif

View File

@@ -835,12 +835,13 @@ uint floatBitsToUint(float) RET;
float intBitsToFloat(int) RET;
float uintBitsToFloat(uint) RET;
namespace gl_FragmentShader {
/* Derivative functions. */
template<typename T> T dFdx(T) RET;
template<typename T> T dFdy(T) RET;
template<typename T> T fwidth(T) RET;
} // namespace gl_FragmentShader
template<typename T> T gpu_dfdx(T) RET;
template<typename T> T gpu_dfdy(T) RET;
template<typename T> T gpu_fwidth(T) RET;
/* Discards the output of the current fragment shader invocation and halts its execution. */
void gpu_discard_fragment() {}
/* Geometric functions. */
template<typename T, int D> VecBase<T, D> faceforward(VecOp<T, D>, VecOp<T, D>, VecOp<T, D>) RET;
@@ -944,9 +945,6 @@ extern const uint gl_LocalInvocationIndex;
/* Pass argument by copy (default). */
#define in
/* Discards the output of the current fragment shader invocation and halts its execution. */
#define discard
/* Decorate a variable in global scope that is common to all threads in a thread-group. */
#define shared

View File

@@ -30,7 +30,7 @@ void main()
fragColor = color2;
}
else {
discard;
gpu_discard_fragment();
}
}
}

View File

@@ -16,7 +16,7 @@ float3 compute_masks(float2 uv)
/* Correct aspect ratio for 2D views not using uniform scaling.
* uv is already in pixel space so a uniform scale should give us a ratio of 1. */
float ratio = (butCo != -2.0f) ? abs(dFdy(uv.y) / dFdx(uv.x)) : 1.0f;
float ratio = (butCo != -2.0f) ? abs(gpu_dfdy(uv.y) / gpu_dfdx(uv.x)) : 1.0f;
float2 uv_sdf = uv;
uv_sdf.x *= ratio;
@@ -43,7 +43,7 @@ float3 compute_masks(float2 uv)
/* Clamp line width to be at least 1px wide. This can happen if the projection matrix
* has been scaled (i.e: Node editor)... */
float line_width = (lineWidth > 0.0f) ? max(fwidth(uv.y), lineWidth) : 0.0f;
float line_width = (lineWidth > 0.0f) ? max(gpu_fwidth(uv.y), lineWidth) : 0.0f;
constexpr float aa_radius = 0.5f;
float3 masks;
@@ -73,7 +73,7 @@ float4 do_checkerboard()
void main()
{
if (min(1.0f, -butCo) > discardFac) {
discard;
gpu_discard_fragment();
}
float3 masks = compute_masks(uvInterp);

View File

@@ -12,7 +12,7 @@ void main()
{
#ifdef CLIP
if (clip < 0.0f) {
discard;
gpu_discard_fragment();
}
#endif
fragColor = final_color;

View File

@@ -339,8 +339,8 @@ void dF_branch(float fn, out float2 result)
{
/* NOTE: this function is currently unused, once it is used we need to check if
* `g_derivative_filter_width` needs to be applied. */
result.x = dFdx(fn);
result.y = dFdy(fn);
result.x = gpu_dfdx(fn);
result.y = gpu_dfdy(fn);
}
#else
@@ -353,10 +353,10 @@ int g_derivative_flag = 0;
float3 dF_impl(float3 v)
{
if (g_derivative_flag > 0) {
return dFdx(v) * g_derivative_filter_width;
return gpu_dfdx(v) * g_derivative_filter_width;
}
else if (g_derivative_flag < 0) {
return dFdy(v) * g_derivative_filter_width;
return gpu_dfdy(v) * g_derivative_filter_width;
}
return float3(0.0f);
}

View File

@@ -15,7 +15,7 @@ void main()
tColor.a = tColor.a * -1.0f;
float dist = length(interp.mTexCoord - center);
if (dist > 0.25f) {
discard;
gpu_discard_fragment();
}
}
/* Solid */

View File

@@ -37,7 +37,7 @@ void main()
if (stereo_display_mode == S3D_DISPLAY_INTERLACE && (interlace(texel) == stereo_interlace_swap))
{
discard;
gpu_discard_fragment();
}
imageColor = texelFetch(imageTexture, texel, 0);

View File

@@ -24,6 +24,6 @@ void main()
fragColor.a = mix(color.a, 0.0f, smoothstep(radii[1], radii[0], dist));
if (fragColor.a == 0.0f) {
discard;
gpu_discard_fragment();
}
}

View File

@@ -33,6 +33,6 @@ void main()
}
if (fragColor.a == 0.0f) {
discard;
gpu_discard_fragment();
}
}

View File

@@ -14,7 +14,7 @@ void main()
/* Round point with jagged edges. */
if (dist_squared > rad_squared) {
discard;
gpu_discard_fragment();
}
fragColor = finalColor;

View File

@@ -28,8 +28,8 @@ void node_bump(float strength,
dist *= FrontFacing ? invert : -invert;
#ifdef GPU_FRAGMENT_SHADER
float3 dPdx = dFdx(g_data.P);
float3 dPdy = dFdy(g_data.P);
float3 dPdx = gpu_dfdx(g_data.P);
float3 dPdy = gpu_dfdy(g_data.P);
/* Get surface tangents from normal. */
float3 Rx = cross(dPdy, N);

View File

@@ -57,8 +57,8 @@ void point_map_to_tube(float3 vin, out float3 vout)
void node_tex_image_linear(float3 co, sampler2D ima, out float4 color, out float alpha)
{
#ifdef GPU_FRAGMENT_SHADER
float2 dx = dFdx(co.xy) * texture_lod_bias_get();
float2 dy = dFdy(co.xy) * texture_lod_bias_get();
float2 dx = gpu_dfdx(co.xy) * texture_lod_bias_get();
float2 dy = gpu_dfdy(co.xy) * texture_lod_bias_get();
color = safe_color(textureGrad(ima, co.xy, dx, dy));
#else

View File

@@ -20,8 +20,8 @@ void node_wireframe_screenspace(float size, out float fac)
#ifdef GPU_FRAGMENT_SHADER
size *= (1.0f / 3.0f);
float3 dx = dFdx(barys);
float3 dy = dFdy(barys);
float3 dx = gpu_dfdx(barys);
float3 dy = gpu_dfdy(barys);
float3 deltas = sqrt(dx * dx + dy * dy);
float3 s = step(-deltas * size, -barys);

View File

@@ -1165,3 +1165,11 @@ RESHAPE(float3x3, float3x4, m[0].xyz, m[1].xyz, m[2].xyz)
#define _enum_end \
} \
;
/* Stage agnostic builtin function.
* MSL allow mixing shader stages inside the same source file.
* Leaving the calls untouched makes sure we catch invalid usage during CI testing. */
#define gpu_discard_fragment() discard
#define gpu_dfdx(x) dFdx(x)
#define gpu_dfdy(x) dFdy(x)
#define gpu_fwidth(x) fwidth(x)

View File

@@ -160,3 +160,18 @@ RESHAPE(float3x3, mat3x3, mat3x4)
#define _enum_type(name) uint
#define _enum_decl(name) constexpr uint
#define _enum_end _enum_dummy;
/* Stage agnostic builtin function.
* GLSL doesn't allow mixing shader stages inside the same source file.
* Make sure builtin functions are stubbed when used in an invalid stage. */
#ifdef GPU_FRAGMENT_SHADER
# define gpu_discard_fragment() discard
# define gpu_dfdx(x) dFdx(x)
# define gpu_dfdy(x) dFdy(x)
# define gpu_fwidth(x) fwidth(x)
#else
# define gpu_discard_fragment()
# define gpu_dfdx(x) x
# define gpu_dfdy(x) x
# define gpu_fwidth(x) x
#endif

View File

@@ -73,7 +73,10 @@ void VKDevice::deinit()
vk_queue_family_ = 0;
vk_queue_ = VK_NULL_HANDLE;
vk_physical_device_properties_ = {};
glsl_patch_.clear();
glsl_vert_patch_.clear();
glsl_frag_patch_.clear();
glsl_geom_patch_.clear();
glsl_comp_patch_.clear();
lifetime = Lifetime::DESTROYED;
}
@@ -304,14 +307,35 @@ void VKDevice::init_glsl_patch()
}
/* GLSL Backend Lib. */
ss << datatoc_glsl_shader_defines_glsl;
glsl_patch_ = ss.str();
glsl_vert_patch_ = ss.str() + "#define GPU_VERTEX_SHADER" + datatoc_glsl_shader_defines_glsl;
glsl_geom_patch_ = ss.str() + "#define GPU_GEOMETRY_SHADER" + datatoc_glsl_shader_defines_glsl;
glsl_frag_patch_ = ss.str() + "#define GPU_FRAGMENT_SHADER" + datatoc_glsl_shader_defines_glsl;
glsl_comp_patch_ = ss.str() + "#define GPU_COMPUTE_SHADER" + datatoc_glsl_shader_defines_glsl;
}
const char *VKDevice::glsl_patch_get() const
const char *VKDevice::glsl_vertex_patch_get() const
{
BLI_assert(!glsl_patch_.empty());
return glsl_patch_.c_str();
BLI_assert(!glsl_vert_patch_.empty());
return glsl_vert_patch_.c_str();
}
const char *VKDevice::glsl_geometry_patch_get() const
{
BLI_assert(!glsl_geom_patch_.empty());
return glsl_geom_patch_.c_str();
}
const char *VKDevice::glsl_fragment_patch_get() const
{
BLI_assert(!glsl_frag_patch_.empty());
return glsl_frag_patch_.c_str();
}
const char *VKDevice::glsl_compute_patch_get() const
{
BLI_assert(!glsl_comp_patch_.empty());
return glsl_comp_patch_.c_str();
}
/* -------------------------------------------------------------------- */

View File

@@ -208,7 +208,10 @@ class VKDevice : public NonCopyable {
VKWorkarounds workarounds_;
VKExtensions extensions_;
std::string glsl_patch_;
std::string glsl_vert_patch_;
std::string glsl_geom_patch_;
std::string glsl_frag_patch_;
std::string glsl_comp_patch_;
Vector<VKThreadData *> thread_data_;
public:
@@ -361,7 +364,10 @@ class VKDevice : public NonCopyable {
return extensions_;
}
const char *glsl_patch_get() const;
const char *glsl_vertex_patch_get() const;
const char *glsl_geometry_patch_get() const;
const char *glsl_fragment_patch_get() const;
const char *glsl_compute_patch_get() const;
void init_glsl_patch();
/* -------------------------------------------------------------------- */

View File

@@ -533,15 +533,29 @@ void VKShader::build_shader_module(MutableSpan<StringRefNull> sources,
shaderc_shader_kind stage,
VKShaderModule &r_shader_module)
{
BLI_assert_msg(ELEM(stage,
shaderc_vertex_shader,
shaderc_geometry_shader,
shaderc_fragment_shader,
shaderc_compute_shader),
"Only forced ShaderC shader kinds are supported.");
r_shader_module.is_ready = false;
const VKDevice &device = VKBackend::get().device;
sources[SOURCES_INDEX_VERSION] = device.glsl_patch_get();
const char *source_patch = nullptr;
switch (stage) {
case shaderc_vertex_shader:
source_patch = device.glsl_vertex_patch_get();
break;
case shaderc_geometry_shader:
source_patch = device.glsl_geometry_patch_get();
break;
case shaderc_fragment_shader:
source_patch = device.glsl_fragment_patch_get();
break;
case shaderc_compute_shader:
source_patch = device.glsl_compute_patch_get();
break;
default:
BLI_assert_msg(0, "Only forced ShaderC shader kinds are supported.");
break;
}
sources[SOURCES_INDEX_VERSION] = source_patch;
r_shader_module.combined_sources = combine_sources(sources);
if (!use_batch_compilation_) {
VKShaderCompiler::compile_module(*this, stage, r_shader_module);