GPU: Shader: Add wrapper to stage agnostic function
This avoid having to guards functions that are only available in fragment shader stage. Calling the function inside another stage is still invalid and will yield a compile error on Metal. The vulkan and opengl glsl patch need to be modified per stage to allow the fragment specific function to be defined. This is not yet widely used, but a good example is the change in `film_display_depth_amend`. Rel #137261 Pull Request: https://projects.blender.org/blender/blender/pulls/138280
This commit is contained in:
committed by
Clément Foucault
parent
92ac9f3b25
commit
8dee08996e
@@ -23,7 +23,7 @@ void main()
|
||||
GBufferReader gbuf = gbuffer_read(gbuf_header_tx, gbuf_closure_tx, gbuf_normal_tx, texel);
|
||||
|
||||
if (gbuf.closure_count == 0) {
|
||||
discard;
|
||||
gpu_discard_fragment();
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -42,7 +42,7 @@ void main()
|
||||
|
||||
/* Display surfels as circles. */
|
||||
if (distance(P, surfel.position) > debug_surfel_radius) {
|
||||
discard;
|
||||
gpu_discard_fragment();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -28,7 +28,7 @@ void main()
|
||||
* Discard fragments that do not have a number of closure whose bit-pattern
|
||||
* overlap the current stencil un-masked bit. */
|
||||
if ((current_bit & (closure_count | has_transmission)) == 0) {
|
||||
discard;
|
||||
gpu_discard_fragment();
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -44,7 +44,7 @@ void main()
|
||||
shapes = saturate(1.0f - linearstep(-0.8f, 0.8f, shapes));
|
||||
/* Outside of bokeh shape. Try to avoid overloading ROPs. */
|
||||
if (reduce_max(shapes) == 0.0f) {
|
||||
discard;
|
||||
gpu_discard_fragment();
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ void main()
|
||||
|
||||
/* Discard outside the circle. */
|
||||
if (dist_sqr > 1.0f) {
|
||||
discard;
|
||||
gpu_discard_fragment();
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ void main()
|
||||
|
||||
/* Discard outside the circle. */
|
||||
if (dist_sqr > 1.0f) {
|
||||
discard;
|
||||
gpu_discard_fragment();
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -48,6 +48,6 @@ void main()
|
||||
|
||||
float out_depth = imageLoadFast(depth_img, texel).r;
|
||||
out_depth = drw_depth_view_to_screen(-out_depth);
|
||||
out_depth += 2.4e-7f * 4.0f + fwidth(out_depth);
|
||||
out_depth += 2.4e-7f * 4.0f + gpu_fwidth(out_depth);
|
||||
gl_FragDepth = saturate(out_depth);
|
||||
}
|
||||
|
||||
@@ -630,9 +630,7 @@ float film_display_depth_amend(int2 texel, float depth)
|
||||
* twice. One for X and one for Y direction. */
|
||||
/* TODO(fclem): This could be improved as it gives flickering result at depth discontinuity.
|
||||
* But this is the quickest stable result I could come with for now. */
|
||||
#ifdef GPU_FRAGMENT_SHADER
|
||||
depth += fwidth(depth);
|
||||
#endif
|
||||
depth += gpu_fwidth(depth);
|
||||
/* Small offset to avoid depth test lessEqual failing because of all the conversions loss. */
|
||||
depth += 2.4e-7f * 4.0f;
|
||||
return saturate(depth);
|
||||
|
||||
@@ -13,7 +13,7 @@ void main()
|
||||
|
||||
float distance_from_center = distance(uv_coord.xy, float2(0.5f));
|
||||
if (distance_from_center > 0.5f) {
|
||||
discard;
|
||||
gpu_discard_fragment();
|
||||
return;
|
||||
}
|
||||
float smooth_size = texel_size.x * 1.5f;
|
||||
|
||||
@@ -621,7 +621,7 @@ float2 bsdf_lut(float cos_theta, float roughness, float ior, bool do_multiscatte
|
||||
/* Return new shading normal. */
|
||||
float3 displacement_bump()
|
||||
{
|
||||
# if defined(GPU_FRAGMENT_SHADER) && !defined(MAT_GEOM_CURVES)
|
||||
# if !defined(MAT_GEOM_CURVES)
|
||||
/* This is the filter width for automatic displacement + bump mapping, which is fixed.
|
||||
* NOTE: keep the same as default bump node filter width. */
|
||||
constexpr float bump_filter_width = 0.1f;
|
||||
@@ -629,8 +629,8 @@ float3 displacement_bump()
|
||||
float2 dHd;
|
||||
dF_branch(dot(nodetree_displacement(), g_data.N + dF_impl(g_data.N)), bump_filter_width, dHd);
|
||||
|
||||
float3 dPdx = dFdx(g_data.P);
|
||||
float3 dPdy = dFdy(g_data.P);
|
||||
float3 dPdx = gpu_dfdx(g_data.P);
|
||||
float3 dPdy = gpu_dfdy(g_data.P);
|
||||
|
||||
/* Get surface tangents from normal. */
|
||||
float3 Rx = cross(dPdy, g_data.N);
|
||||
|
||||
@@ -50,7 +50,7 @@ void main()
|
||||
|
||||
float transparency = average(g_transmittance);
|
||||
if (transparency > threshold) {
|
||||
discard;
|
||||
gpu_discard_fragment();
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
@@ -60,7 +60,7 @@ void main()
|
||||
* This would in turn create a discrepancy between the pre-pass depth and the G-buffer depth
|
||||
* which exhibits missing pixels data. */
|
||||
if (clip_interp.clip_distance > 0.0f) {
|
||||
discard;
|
||||
gpu_discard_fragment();
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -18,8 +18,8 @@ SHADER_LIBRARY_CREATE_INFO(eevee_geom_mesh)
|
||||
#if defined(USE_BARYCENTRICS) && defined(GPU_FRAGMENT_SHADER) && defined(MAT_GEOM_MESH)
|
||||
float3 barycentric_distances_get()
|
||||
{
|
||||
float wp_delta = length(dFdx(interp.P)) + length(dFdy(interp.P));
|
||||
float bc_delta = length(dFdx(gpu_BaryCoord)) + length(dFdy(gpu_BaryCoord));
|
||||
float wp_delta = length(gpu_dfdx(interp.P)) + length(gpu_dfdy(interp.P));
|
||||
float bc_delta = length(gpu_dfdx(gpu_BaryCoord)) + length(gpu_dfdy(gpu_BaryCoord));
|
||||
float rate_of_change = wp_delta / bc_delta;
|
||||
return rate_of_change * (1.0f - gpu_BaryCoord);
|
||||
}
|
||||
@@ -107,7 +107,7 @@ void init_globals()
|
||||
#ifdef GPU_FRAGMENT_SHADER
|
||||
g_data.N = (FrontFacing) ? g_data.N : -g_data.N;
|
||||
g_data.Ni = (FrontFacing) ? g_data.Ni : -g_data.Ni;
|
||||
g_data.Ng = safe_normalize(cross(dFdx(g_data.P), dFdy(g_data.P)));
|
||||
g_data.Ng = safe_normalize(cross(gpu_dfdx(g_data.P), gpu_dfdy(g_data.P)));
|
||||
#endif
|
||||
|
||||
#if defined(MAT_GEOM_MESH)
|
||||
|
||||
@@ -43,7 +43,7 @@ void main()
|
||||
ndc_depth = 1.0f;
|
||||
#else
|
||||
# define discard_result \
|
||||
discard; \
|
||||
gpu_discard_fragment(); \
|
||||
return;
|
||||
#endif
|
||||
|
||||
|
||||
@@ -20,7 +20,7 @@ float transparency_hash_3d(float3 a)
|
||||
float transparency_hashed_alpha_threshold(float hash_scale, float hash_offset, float3 P)
|
||||
{
|
||||
/* Find the discretized derivatives of our coordinates. */
|
||||
float max_deriv = max(length(dFdx(P)), length(dFdy(P)));
|
||||
float max_deriv = max(length(gpu_dfdx(P)), length(gpu_dfdy(P)));
|
||||
float pix_scale = 1.0f / (hash_scale * max_deriv);
|
||||
/* Find two nearest log-discretized noise scales. */
|
||||
float pix_scale_log = log2(pix_scale);
|
||||
|
||||
@@ -16,7 +16,7 @@ void main()
|
||||
out_edges = max(out_edges, SMAALumaEdgeDetectionPS(uvs, offset, reveal_tx));
|
||||
/* Discard if there is no edge. */
|
||||
if (dot(out_edges, float2(1.0f, 1.0f)) == 0.0f) {
|
||||
discard;
|
||||
gpu_discard_fragment();
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -102,7 +102,7 @@ void main()
|
||||
revealColor = float4(0.0f, 0.0f, 0.0f, frag_color.a);
|
||||
|
||||
if (frag_color.a < 0.001f) {
|
||||
discard;
|
||||
gpu_discard_fragment();
|
||||
return;
|
||||
}
|
||||
}
|
||||
@@ -113,7 +113,7 @@ void main()
|
||||
/* Manual depth test */
|
||||
float scene_depth = texture(gp_scene_depth_tx, uvs).r;
|
||||
if (gl_FragCoord.z > scene_depth) {
|
||||
discard;
|
||||
gpu_discard_fragment();
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -121,7 +121,7 @@ void main()
|
||||
* depth written where the mask obliterate the layer. */
|
||||
float mask = texture(gp_mask_tx, uvs).r;
|
||||
if (mask < 0.001f) {
|
||||
discard;
|
||||
gpu_discard_fragment();
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -18,7 +18,7 @@ void main()
|
||||
int2 uvs_clamped = int2(uv_screen);
|
||||
float depth = texelFetch(depth_tx, uvs_clamped, 0).r;
|
||||
if (depth == 1.0f) {
|
||||
discard;
|
||||
gpu_discard_fragment();
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -14,7 +14,7 @@ void main()
|
||||
* but needed for view clarity in X-ray mode and support
|
||||
* for inverted bone matrices. */
|
||||
if ((inverted == 1) == gl_FrontFacing) {
|
||||
discard;
|
||||
gpu_discard_fragment();
|
||||
return;
|
||||
}
|
||||
frag_color = float4(final_color.rgb, alpha);
|
||||
|
||||
@@ -29,7 +29,7 @@ void main()
|
||||
#ifndef SELECT_ENABLE
|
||||
/* We cannot discard the fragment in selection mode. Otherwise we would break pipeline
|
||||
* correctness (no discard if early depth test enforced). */
|
||||
discard;
|
||||
gpu_discard_fragment();
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -15,7 +15,7 @@ void main()
|
||||
float4 inner_color = float4(float3(0.0f), 1.0f);
|
||||
float4 outer_color = float4(0.0f);
|
||||
|
||||
float2 dd = fwidth(stipple_pos);
|
||||
float2 dd = gpu_fwidth(stipple_pos);
|
||||
float line_distance = distance(stipple_pos, stipple_start) / max(dd.x, dd.y);
|
||||
|
||||
if (OVERLAY_UVLineStyle(line_style) == OVERLAY_UV_LINE_STYLE_OUTLINE) {
|
||||
|
||||
@@ -33,7 +33,7 @@ void main()
|
||||
* This is because we force the early depth test to only output the front most fragment.
|
||||
* Discarding would expose us to race condition depending on rasterization order. */
|
||||
if (fract(dist / dash_width) > dash_factor) {
|
||||
discard;
|
||||
gpu_discard_fragment();
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
@@ -43,8 +43,8 @@ float3 get_axes(float3 co, float3 fwidthCos, float line_size)
|
||||
void main()
|
||||
{
|
||||
float3 P = local_pos * grid_buf.size.xyz;
|
||||
float3 dFdxPos = dFdx(P);
|
||||
float3 dFdyPos = dFdy(P);
|
||||
float3 dFdxPos = gpu_dfdx(P);
|
||||
float3 dFdyPos = gpu_dfdy(P);
|
||||
float3 fwidthPos = abs(dFdxPos) + abs(dFdyPos);
|
||||
P += drw_view_position() * plane_axes;
|
||||
|
||||
@@ -210,7 +210,7 @@ void main()
|
||||
* (avoids popping visuals due to depth buffer precision) */
|
||||
/* Harder settings tend to flicker more,
|
||||
* but have less "see through" appearance. */
|
||||
float bias = max(fwidth(gl_FragCoord.z), 2.4e-7f);
|
||||
float bias = max(gpu_fwidth(gl_FragCoord.z), 2.4e-7f);
|
||||
fade *= linearstep(grid_depth, grid_depth + bias, scene_depth);
|
||||
}
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@ void main()
|
||||
/* Arbitrary discard anything below 5% opacity.
|
||||
* Note that this could be exposed to the User. */
|
||||
if (tex_color.a < 0.05f) {
|
||||
discard;
|
||||
gpu_discard_fragment();
|
||||
}
|
||||
else {
|
||||
frag_color.a = 1.0f;
|
||||
|
||||
@@ -265,7 +265,7 @@ void main()
|
||||
/* NOTE(Metal): Discards are not explicit returns in Metal. We should also return to avoid
|
||||
* erroneous derivatives which can manifest during texture sampling in
|
||||
* non-uniform-control-flow. */
|
||||
discard;
|
||||
gpu_discard_fragment();
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -25,7 +25,7 @@ void main()
|
||||
gp_interp_noperspective.thickness.x,
|
||||
gp_interp_noperspective.hardness) < 0.001f)
|
||||
{
|
||||
discard;
|
||||
gpu_discard_fragment();
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -90,7 +90,7 @@ void main()
|
||||
/* Contour display */
|
||||
if (draw_contours) {
|
||||
/* This must be executed uniformly for all fragments */
|
||||
float weight_gradient = length(float2(dFdx(weight), dFdy(weight)));
|
||||
float weight_gradient = length(float2(gpu_dfdx(weight), gpu_dfdy(weight)));
|
||||
|
||||
float4 grid = contour_grid(weight, weight_gradient);
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ void main()
|
||||
float dist = length(uv);
|
||||
|
||||
if (dist > 0.5f) {
|
||||
discard;
|
||||
gpu_discard_fragment();
|
||||
return;
|
||||
}
|
||||
/* Nice sphere falloff. */
|
||||
|
||||
@@ -16,7 +16,7 @@ void main()
|
||||
|
||||
/* Round point with jagged edges. */
|
||||
if (dist_squared > rad_squared) {
|
||||
discard;
|
||||
gpu_discard_fragment();
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ void main()
|
||||
/* Needed only because of wireframe slider.
|
||||
* If we could get rid of it would be nice because of performance drain of discard. */
|
||||
if (edge_start.r == -1.0f) {
|
||||
discard;
|
||||
gpu_discard_fragment();
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -18,7 +18,7 @@ void main()
|
||||
return;
|
||||
}
|
||||
|
||||
discard;
|
||||
gpu_discard_fragment();
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -34,6 +34,6 @@ void main()
|
||||
return;
|
||||
}
|
||||
|
||||
discard;
|
||||
gpu_discard_fragment();
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -23,7 +23,7 @@ void main()
|
||||
float depth = texture(depth_tx, uv).r;
|
||||
if (depth == 1.0f) {
|
||||
/* Skip the background. */
|
||||
discard;
|
||||
gpu_discard_fragment();
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ void main()
|
||||
out_edges = SMAALumaEdgeDetectionPS(uvs, offset, color_tx);
|
||||
/* Discard if there is no edge. */
|
||||
if (dot(out_edges, float2(1.0f, 1.0f)) == 0.0f) {
|
||||
discard;
|
||||
gpu_discard_fragment();
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -59,7 +59,7 @@ float3 workbench_image_color(float2 uvs)
|
||||
|
||||
# ifdef GPU_FRAGMENT_SHADER
|
||||
if (color.a < image_transparency_cutoff) {
|
||||
discard;
|
||||
gpu_discard_fragment();
|
||||
}
|
||||
# endif
|
||||
|
||||
|
||||
@@ -240,7 +240,7 @@ void main()
|
||||
constexpr uint in_front_stencil_bits = 1u << 1;
|
||||
if (do_depth_test && (stencil & in_front_stencil_bits) != 0) {
|
||||
/* Don't draw on top of "in front" objects. */
|
||||
discard;
|
||||
gpu_discard_fragment();
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -255,7 +255,7 @@ void main()
|
||||
* Adding a return call eliminates undefined behavior and a later out-of-bounds read causing
|
||||
* a crash on AMD platforms.
|
||||
* This behavior can also affect OpenGL on certain devices. */
|
||||
discard;
|
||||
gpu_discard_fragment();
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -305,7 +305,7 @@ void main()
|
||||
if (dot(ls_ray_dir, ls_vol_isect) < 0.0f) {
|
||||
/* Start is further away than the end.
|
||||
* That means no volume is intersected. */
|
||||
discard;
|
||||
gpu_discard_fragment();
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@@ -1016,18 +1016,13 @@ bool GLShader::do_geometry_shader_injection(const shader::ShaderCreateInfo *info
|
||||
/** \name Shader stage creation
|
||||
* \{ */
|
||||
|
||||
static StringRefNull glsl_patch_default_get()
|
||||
static StringRefNull glsl_patch_vertex_get()
|
||||
{
|
||||
/** Used for shader patching. Init once. */
|
||||
static std::string patch = []() {
|
||||
std::stringstream ss;
|
||||
/* Version need to go first. */
|
||||
if (epoxy_gl_version() >= 43) {
|
||||
ss << "#version 430\n";
|
||||
}
|
||||
else {
|
||||
ss << "#version 330\n";
|
||||
}
|
||||
ss << "#version 430\n";
|
||||
|
||||
/* Enable extensions for features that are not part of our base GLSL version
|
||||
* don't use an extension for something already available! */
|
||||
@@ -1042,13 +1037,6 @@ static StringRefNull glsl_patch_default_get()
|
||||
if (GLContext::native_barycentric_support) {
|
||||
ss << "#extension GL_AMD_shader_explicit_vertex_parameter: enable\n";
|
||||
}
|
||||
if (GLContext::framebuffer_fetch_support) {
|
||||
ss << "#extension GL_EXT_shader_framebuffer_fetch: enable\n";
|
||||
}
|
||||
if (GPU_stencil_export_support()) {
|
||||
ss << "#extension GL_ARB_shader_stencil_export: enable\n";
|
||||
ss << "#define GPU_ARB_shader_stencil_export\n";
|
||||
}
|
||||
|
||||
/* Fallbacks. */
|
||||
if (!GLContext::shader_draw_parameters_support) {
|
||||
@@ -1057,11 +1045,75 @@ static StringRefNull glsl_patch_default_get()
|
||||
|
||||
/* Vulkan GLSL compatibility. */
|
||||
ss << "#define gpu_InstanceIndex (gl_InstanceID + gpu_BaseInstance)\n";
|
||||
ss << "#define gpu_EmitVertex EmitVertex\n";
|
||||
|
||||
/* Array compatibility. */
|
||||
ss << "#define gpu_Array(_type) _type[]\n";
|
||||
|
||||
/* Needs to have this defined upfront for configuring shader defines. */
|
||||
ss << "#define GPU_VERTEX_SHADER\n";
|
||||
/* GLSL Backend Lib. */
|
||||
ss << datatoc_glsl_shader_defines_glsl;
|
||||
|
||||
return ss.str();
|
||||
}();
|
||||
return patch;
|
||||
}
|
||||
|
||||
static StringRefNull glsl_patch_geometry_get()
|
||||
{
|
||||
/** Used for shader patching. Init once. */
|
||||
static std::string patch = []() {
|
||||
std::stringstream ss;
|
||||
/* Version need to go first. */
|
||||
ss << "#version 430\n";
|
||||
|
||||
if (GLContext::layered_rendering_support) {
|
||||
ss << "#extension GL_ARB_shader_viewport_layer_array: enable\n";
|
||||
}
|
||||
if (GLContext::native_barycentric_support) {
|
||||
ss << "#extension GL_AMD_shader_explicit_vertex_parameter: enable\n";
|
||||
}
|
||||
|
||||
/* Array compatibility. */
|
||||
ss << "#define gpu_Array(_type) _type[]\n";
|
||||
|
||||
/* Needs to have this defined upfront for configuring shader defines. */
|
||||
ss << "#define GPU_GEOMETRY_SHADER\n";
|
||||
/* GLSL Backend Lib. */
|
||||
ss << datatoc_glsl_shader_defines_glsl;
|
||||
|
||||
return ss.str();
|
||||
}();
|
||||
return patch;
|
||||
}
|
||||
|
||||
static StringRefNull glsl_patch_fragment_get()
|
||||
{
|
||||
/** Used for shader patching. Init once. */
|
||||
static std::string patch = []() {
|
||||
std::stringstream ss;
|
||||
/* Version need to go first. */
|
||||
ss << "#version 430\n";
|
||||
|
||||
if (GLContext::layered_rendering_support) {
|
||||
ss << "#extension GL_ARB_shader_viewport_layer_array: enable\n";
|
||||
}
|
||||
if (GLContext::native_barycentric_support) {
|
||||
ss << "#extension GL_AMD_shader_explicit_vertex_parameter: enable\n";
|
||||
}
|
||||
if (GLContext::framebuffer_fetch_support) {
|
||||
ss << "#extension GL_EXT_shader_framebuffer_fetch: enable\n";
|
||||
}
|
||||
if (GPU_stencil_export_support()) {
|
||||
ss << "#extension GL_ARB_shader_stencil_export: enable\n";
|
||||
ss << "#define GPU_ARB_shader_stencil_export\n";
|
||||
}
|
||||
|
||||
/* Array compatibility. */
|
||||
ss << "#define gpu_Array(_type) _type[]\n";
|
||||
|
||||
/* Needs to have this defined upfront for configuring shader defines. */
|
||||
ss << "#define GPU_FRAGMENT_SHADER\n";
|
||||
/* GLSL Backend Lib. */
|
||||
ss << datatoc_glsl_shader_defines_glsl;
|
||||
|
||||
@@ -1077,11 +1129,13 @@ static StringRefNull glsl_patch_compute_get()
|
||||
std::stringstream ss;
|
||||
/* Version need to go first. */
|
||||
ss << "#version 430\n";
|
||||
ss << "#extension GL_ARB_compute_shader :enable\n";
|
||||
|
||||
/* Array compatibility. */
|
||||
ss << "#define gpu_Array(_type) _type[]\n";
|
||||
|
||||
/* Needs to have this defined upfront for configuring shader defines. */
|
||||
ss << "#define GPU_COMPUTE_SHADER\n";
|
||||
|
||||
ss << datatoc_glsl_shader_defines_glsl;
|
||||
|
||||
return ss.str();
|
||||
@@ -1091,10 +1145,20 @@ static StringRefNull glsl_patch_compute_get()
|
||||
|
||||
StringRefNull GLShader::glsl_patch_get(GLenum gl_stage)
|
||||
{
|
||||
if (gl_stage == GL_VERTEX_SHADER) {
|
||||
return glsl_patch_vertex_get();
|
||||
}
|
||||
if (gl_stage == GL_GEOMETRY_SHADER) {
|
||||
return glsl_patch_geometry_get();
|
||||
}
|
||||
if (gl_stage == GL_FRAGMENT_SHADER) {
|
||||
return glsl_patch_fragment_get();
|
||||
}
|
||||
if (gl_stage == GL_COMPUTE_SHADER) {
|
||||
return glsl_patch_compute_get();
|
||||
}
|
||||
return glsl_patch_default_get();
|
||||
BLI_assert_unreachable();
|
||||
return "";
|
||||
}
|
||||
|
||||
GLuint GLShader::create_shader_stage(GLenum gl_stage,
|
||||
|
||||
@@ -735,7 +735,7 @@ float2 SMAALumaEdgeDetectionPS(float2 texcoord,
|
||||
# ifndef SMAA_NO_DISCARD
|
||||
// Then discard if there is no edge:
|
||||
if (dot(edges, float2(1.0f, 1.0f)) == 0.0f) {
|
||||
discard;
|
||||
gpu_discard_fragment();
|
||||
return float2(0.0f, 0.0f);
|
||||
}
|
||||
# endif
|
||||
@@ -810,7 +810,7 @@ float2 SMAAColorEdgeDetectionPS(float2 texcoord,
|
||||
# ifdef GPU_FRAGMENT_SHADER
|
||||
// Then discard if there is no edge:
|
||||
if (dot(edges, float2(1.0f, 1.0f)) == 0.0f) {
|
||||
discard;
|
||||
gpu_discard_fragment();
|
||||
return float2(0.0f, 0.0f);
|
||||
}
|
||||
# endif
|
||||
@@ -858,7 +858,7 @@ float2 SMAADepthEdgeDetectionPS(float2 texcoord, float4 offset[3], SMAATexture2D
|
||||
|
||||
# ifdef GPU_FRAGMENT_SHADER
|
||||
if (dot(edges, float2(1.0f, 1.0f)) == 0.0f) {
|
||||
discard;
|
||||
gpu_discard_fragment();
|
||||
return float2(0.0f, 0.0f);
|
||||
}
|
||||
# endif
|
||||
|
||||
@@ -835,12 +835,13 @@ uint floatBitsToUint(float) RET;
|
||||
float intBitsToFloat(int) RET;
|
||||
float uintBitsToFloat(uint) RET;
|
||||
|
||||
namespace gl_FragmentShader {
|
||||
/* Derivative functions. */
|
||||
template<typename T> T dFdx(T) RET;
|
||||
template<typename T> T dFdy(T) RET;
|
||||
template<typename T> T fwidth(T) RET;
|
||||
} // namespace gl_FragmentShader
|
||||
template<typename T> T gpu_dfdx(T) RET;
|
||||
template<typename T> T gpu_dfdy(T) RET;
|
||||
template<typename T> T gpu_fwidth(T) RET;
|
||||
|
||||
/* Discards the output of the current fragment shader invocation and halts its execution. */
|
||||
void gpu_discard_fragment() {}
|
||||
|
||||
/* Geometric functions. */
|
||||
template<typename T, int D> VecBase<T, D> faceforward(VecOp<T, D>, VecOp<T, D>, VecOp<T, D>) RET;
|
||||
@@ -944,9 +945,6 @@ extern const uint gl_LocalInvocationIndex;
|
||||
/* Pass argument by copy (default). */
|
||||
#define in
|
||||
|
||||
/* Discards the output of the current fragment shader invocation and halts its execution. */
|
||||
#define discard
|
||||
|
||||
/* Decorate a variable in global scope that is common to all threads in a thread-group. */
|
||||
#define shared
|
||||
|
||||
|
||||
@@ -30,7 +30,7 @@ void main()
|
||||
fragColor = color2;
|
||||
}
|
||||
else {
|
||||
discard;
|
||||
gpu_discard_fragment();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,7 +16,7 @@ float3 compute_masks(float2 uv)
|
||||
|
||||
/* Correct aspect ratio for 2D views not using uniform scaling.
|
||||
* uv is already in pixel space so a uniform scale should give us a ratio of 1. */
|
||||
float ratio = (butCo != -2.0f) ? abs(dFdy(uv.y) / dFdx(uv.x)) : 1.0f;
|
||||
float ratio = (butCo != -2.0f) ? abs(gpu_dfdy(uv.y) / gpu_dfdx(uv.x)) : 1.0f;
|
||||
float2 uv_sdf = uv;
|
||||
uv_sdf.x *= ratio;
|
||||
|
||||
@@ -43,7 +43,7 @@ float3 compute_masks(float2 uv)
|
||||
|
||||
/* Clamp line width to be at least 1px wide. This can happen if the projection matrix
|
||||
* has been scaled (i.e: Node editor)... */
|
||||
float line_width = (lineWidth > 0.0f) ? max(fwidth(uv.y), lineWidth) : 0.0f;
|
||||
float line_width = (lineWidth > 0.0f) ? max(gpu_fwidth(uv.y), lineWidth) : 0.0f;
|
||||
|
||||
constexpr float aa_radius = 0.5f;
|
||||
float3 masks;
|
||||
@@ -73,7 +73,7 @@ float4 do_checkerboard()
|
||||
void main()
|
||||
{
|
||||
if (min(1.0f, -butCo) > discardFac) {
|
||||
discard;
|
||||
gpu_discard_fragment();
|
||||
}
|
||||
|
||||
float3 masks = compute_masks(uvInterp);
|
||||
|
||||
@@ -12,7 +12,7 @@ void main()
|
||||
{
|
||||
#ifdef CLIP
|
||||
if (clip < 0.0f) {
|
||||
discard;
|
||||
gpu_discard_fragment();
|
||||
}
|
||||
#endif
|
||||
fragColor = final_color;
|
||||
|
||||
@@ -339,8 +339,8 @@ void dF_branch(float fn, out float2 result)
|
||||
{
|
||||
/* NOTE: this function is currently unused, once it is used we need to check if
|
||||
* `g_derivative_filter_width` needs to be applied. */
|
||||
result.x = dFdx(fn);
|
||||
result.y = dFdy(fn);
|
||||
result.x = gpu_dfdx(fn);
|
||||
result.y = gpu_dfdy(fn);
|
||||
}
|
||||
|
||||
#else
|
||||
@@ -353,10 +353,10 @@ int g_derivative_flag = 0;
|
||||
float3 dF_impl(float3 v)
|
||||
{
|
||||
if (g_derivative_flag > 0) {
|
||||
return dFdx(v) * g_derivative_filter_width;
|
||||
return gpu_dfdx(v) * g_derivative_filter_width;
|
||||
}
|
||||
else if (g_derivative_flag < 0) {
|
||||
return dFdy(v) * g_derivative_filter_width;
|
||||
return gpu_dfdy(v) * g_derivative_filter_width;
|
||||
}
|
||||
return float3(0.0f);
|
||||
}
|
||||
|
||||
@@ -15,7 +15,7 @@ void main()
|
||||
tColor.a = tColor.a * -1.0f;
|
||||
float dist = length(interp.mTexCoord - center);
|
||||
if (dist > 0.25f) {
|
||||
discard;
|
||||
gpu_discard_fragment();
|
||||
}
|
||||
}
|
||||
/* Solid */
|
||||
|
||||
@@ -37,7 +37,7 @@ void main()
|
||||
|
||||
if (stereo_display_mode == S3D_DISPLAY_INTERLACE && (interlace(texel) == stereo_interlace_swap))
|
||||
{
|
||||
discard;
|
||||
gpu_discard_fragment();
|
||||
}
|
||||
|
||||
imageColor = texelFetch(imageTexture, texel, 0);
|
||||
|
||||
@@ -24,6 +24,6 @@ void main()
|
||||
fragColor.a = mix(color.a, 0.0f, smoothstep(radii[1], radii[0], dist));
|
||||
|
||||
if (fragColor.a == 0.0f) {
|
||||
discard;
|
||||
gpu_discard_fragment();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -33,6 +33,6 @@ void main()
|
||||
}
|
||||
|
||||
if (fragColor.a == 0.0f) {
|
||||
discard;
|
||||
gpu_discard_fragment();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,7 +14,7 @@ void main()
|
||||
|
||||
/* Round point with jagged edges. */
|
||||
if (dist_squared > rad_squared) {
|
||||
discard;
|
||||
gpu_discard_fragment();
|
||||
}
|
||||
|
||||
fragColor = finalColor;
|
||||
|
||||
@@ -28,8 +28,8 @@ void node_bump(float strength,
|
||||
dist *= FrontFacing ? invert : -invert;
|
||||
|
||||
#ifdef GPU_FRAGMENT_SHADER
|
||||
float3 dPdx = dFdx(g_data.P);
|
||||
float3 dPdy = dFdy(g_data.P);
|
||||
float3 dPdx = gpu_dfdx(g_data.P);
|
||||
float3 dPdy = gpu_dfdy(g_data.P);
|
||||
|
||||
/* Get surface tangents from normal. */
|
||||
float3 Rx = cross(dPdy, N);
|
||||
|
||||
@@ -57,8 +57,8 @@ void point_map_to_tube(float3 vin, out float3 vout)
|
||||
void node_tex_image_linear(float3 co, sampler2D ima, out float4 color, out float alpha)
|
||||
{
|
||||
#ifdef GPU_FRAGMENT_SHADER
|
||||
float2 dx = dFdx(co.xy) * texture_lod_bias_get();
|
||||
float2 dy = dFdy(co.xy) * texture_lod_bias_get();
|
||||
float2 dx = gpu_dfdx(co.xy) * texture_lod_bias_get();
|
||||
float2 dy = gpu_dfdy(co.xy) * texture_lod_bias_get();
|
||||
|
||||
color = safe_color(textureGrad(ima, co.xy, dx, dy));
|
||||
#else
|
||||
|
||||
@@ -20,8 +20,8 @@ void node_wireframe_screenspace(float size, out float fac)
|
||||
|
||||
#ifdef GPU_FRAGMENT_SHADER
|
||||
size *= (1.0f / 3.0f);
|
||||
float3 dx = dFdx(barys);
|
||||
float3 dy = dFdy(barys);
|
||||
float3 dx = gpu_dfdx(barys);
|
||||
float3 dy = gpu_dfdy(barys);
|
||||
float3 deltas = sqrt(dx * dx + dy * dy);
|
||||
|
||||
float3 s = step(-deltas * size, -barys);
|
||||
|
||||
@@ -1165,3 +1165,11 @@ RESHAPE(float3x3, float3x4, m[0].xyz, m[1].xyz, m[2].xyz)
|
||||
#define _enum_end \
|
||||
} \
|
||||
;
|
||||
|
||||
/* Stage agnostic builtin function.
|
||||
* MSL allow mixing shader stages inside the same source file.
|
||||
* Leaving the calls untouched makes sure we catch invalid usage during CI testing. */
|
||||
#define gpu_discard_fragment() discard
|
||||
#define gpu_dfdx(x) dFdx(x)
|
||||
#define gpu_dfdy(x) dFdy(x)
|
||||
#define gpu_fwidth(x) fwidth(x)
|
||||
|
||||
@@ -160,3 +160,18 @@ RESHAPE(float3x3, mat3x3, mat3x4)
|
||||
#define _enum_type(name) uint
|
||||
#define _enum_decl(name) constexpr uint
|
||||
#define _enum_end _enum_dummy;
|
||||
|
||||
/* Stage agnostic builtin function.
|
||||
* GLSL doesn't allow mixing shader stages inside the same source file.
|
||||
* Make sure builtin functions are stubbed when used in an invalid stage. */
|
||||
#ifdef GPU_FRAGMENT_SHADER
|
||||
# define gpu_discard_fragment() discard
|
||||
# define gpu_dfdx(x) dFdx(x)
|
||||
# define gpu_dfdy(x) dFdy(x)
|
||||
# define gpu_fwidth(x) fwidth(x)
|
||||
#else
|
||||
# define gpu_discard_fragment()
|
||||
# define gpu_dfdx(x) x
|
||||
# define gpu_dfdy(x) x
|
||||
# define gpu_fwidth(x) x
|
||||
#endif
|
||||
|
||||
@@ -73,7 +73,10 @@ void VKDevice::deinit()
|
||||
vk_queue_family_ = 0;
|
||||
vk_queue_ = VK_NULL_HANDLE;
|
||||
vk_physical_device_properties_ = {};
|
||||
glsl_patch_.clear();
|
||||
glsl_vert_patch_.clear();
|
||||
glsl_frag_patch_.clear();
|
||||
glsl_geom_patch_.clear();
|
||||
glsl_comp_patch_.clear();
|
||||
lifetime = Lifetime::DESTROYED;
|
||||
}
|
||||
|
||||
@@ -304,14 +307,35 @@ void VKDevice::init_glsl_patch()
|
||||
}
|
||||
|
||||
/* GLSL Backend Lib. */
|
||||
ss << datatoc_glsl_shader_defines_glsl;
|
||||
glsl_patch_ = ss.str();
|
||||
|
||||
glsl_vert_patch_ = ss.str() + "#define GPU_VERTEX_SHADER" + datatoc_glsl_shader_defines_glsl;
|
||||
glsl_geom_patch_ = ss.str() + "#define GPU_GEOMETRY_SHADER" + datatoc_glsl_shader_defines_glsl;
|
||||
glsl_frag_patch_ = ss.str() + "#define GPU_FRAGMENT_SHADER" + datatoc_glsl_shader_defines_glsl;
|
||||
glsl_comp_patch_ = ss.str() + "#define GPU_COMPUTE_SHADER" + datatoc_glsl_shader_defines_glsl;
|
||||
}
|
||||
|
||||
const char *VKDevice::glsl_patch_get() const
|
||||
const char *VKDevice::glsl_vertex_patch_get() const
|
||||
{
|
||||
BLI_assert(!glsl_patch_.empty());
|
||||
return glsl_patch_.c_str();
|
||||
BLI_assert(!glsl_vert_patch_.empty());
|
||||
return glsl_vert_patch_.c_str();
|
||||
}
|
||||
|
||||
const char *VKDevice::glsl_geometry_patch_get() const
|
||||
{
|
||||
BLI_assert(!glsl_geom_patch_.empty());
|
||||
return glsl_geom_patch_.c_str();
|
||||
}
|
||||
|
||||
const char *VKDevice::glsl_fragment_patch_get() const
|
||||
{
|
||||
BLI_assert(!glsl_frag_patch_.empty());
|
||||
return glsl_frag_patch_.c_str();
|
||||
}
|
||||
|
||||
const char *VKDevice::glsl_compute_patch_get() const
|
||||
{
|
||||
BLI_assert(!glsl_comp_patch_.empty());
|
||||
return glsl_comp_patch_.c_str();
|
||||
}
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
|
||||
@@ -208,7 +208,10 @@ class VKDevice : public NonCopyable {
|
||||
VKWorkarounds workarounds_;
|
||||
VKExtensions extensions_;
|
||||
|
||||
std::string glsl_patch_;
|
||||
std::string glsl_vert_patch_;
|
||||
std::string glsl_geom_patch_;
|
||||
std::string glsl_frag_patch_;
|
||||
std::string glsl_comp_patch_;
|
||||
Vector<VKThreadData *> thread_data_;
|
||||
|
||||
public:
|
||||
@@ -361,7 +364,10 @@ class VKDevice : public NonCopyable {
|
||||
return extensions_;
|
||||
}
|
||||
|
||||
const char *glsl_patch_get() const;
|
||||
const char *glsl_vertex_patch_get() const;
|
||||
const char *glsl_geometry_patch_get() const;
|
||||
const char *glsl_fragment_patch_get() const;
|
||||
const char *glsl_compute_patch_get() const;
|
||||
void init_glsl_patch();
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
|
||||
@@ -533,15 +533,29 @@ void VKShader::build_shader_module(MutableSpan<StringRefNull> sources,
|
||||
shaderc_shader_kind stage,
|
||||
VKShaderModule &r_shader_module)
|
||||
{
|
||||
BLI_assert_msg(ELEM(stage,
|
||||
shaderc_vertex_shader,
|
||||
shaderc_geometry_shader,
|
||||
shaderc_fragment_shader,
|
||||
shaderc_compute_shader),
|
||||
"Only forced ShaderC shader kinds are supported.");
|
||||
r_shader_module.is_ready = false;
|
||||
const VKDevice &device = VKBackend::get().device;
|
||||
sources[SOURCES_INDEX_VERSION] = device.glsl_patch_get();
|
||||
const char *source_patch = nullptr;
|
||||
|
||||
switch (stage) {
|
||||
case shaderc_vertex_shader:
|
||||
source_patch = device.glsl_vertex_patch_get();
|
||||
break;
|
||||
case shaderc_geometry_shader:
|
||||
source_patch = device.glsl_geometry_patch_get();
|
||||
break;
|
||||
case shaderc_fragment_shader:
|
||||
source_patch = device.glsl_fragment_patch_get();
|
||||
break;
|
||||
case shaderc_compute_shader:
|
||||
source_patch = device.glsl_compute_patch_get();
|
||||
break;
|
||||
default:
|
||||
BLI_assert_msg(0, "Only forced ShaderC shader kinds are supported.");
|
||||
break;
|
||||
}
|
||||
|
||||
sources[SOURCES_INDEX_VERSION] = source_patch;
|
||||
r_shader_module.combined_sources = combine_sources(sources);
|
||||
if (!use_batch_compilation_) {
|
||||
VKShaderCompiler::compile_module(*this, stage, r_shader_module);
|
||||
|
||||
Reference in New Issue
Block a user