diff --git a/source/blender/draw/engines/eevee_next/shaders/eevee_hiz_update_comp.glsl b/source/blender/draw/engines/eevee_next/shaders/eevee_hiz_update_comp.glsl index dc6ae961cae..17a9c18a71d 100644 --- a/source/blender/draw/engines/eevee_next/shaders/eevee_hiz_update_comp.glsl +++ b/source/blender/draw/engines/eevee_next/shaders/eevee_hiz_update_comp.glsl @@ -53,16 +53,16 @@ void main() #endif if (update_mip_0) { - imageStore(out_mip_0, src_px + ivec2(0, 1), samp.xxxx); - imageStore(out_mip_0, src_px + ivec2(1, 1), samp.yyyy); - imageStore(out_mip_0, src_px + ivec2(1, 0), samp.zzzz); - imageStore(out_mip_0, src_px + ivec2(0, 0), samp.wwww); + imageStoreFast(out_mip_0, src_px + ivec2(0, 1), samp.xxxx); + imageStoreFast(out_mip_0, src_px + ivec2(1, 1), samp.yyyy); + imageStoreFast(out_mip_0, src_px + ivec2(1, 0), samp.zzzz); + imageStoreFast(out_mip_0, src_px + ivec2(0, 0), samp.wwww); } /* Level 1. (No load) */ float max_depth = reduce_max(samp); ivec2 dst_px = ivec2(kernel_origin + local_px); - imageStore(out_mip_1, dst_px, vec4(max_depth)); + imageStoreFast(out_mip_1, dst_px, vec4(max_depth)); store_local_depth(local_px, max_depth); /* Level 2-5. */ @@ -75,7 +75,7 @@ void main() if (active_thread) { \ max_depth = reduce_max(load_local_depths(local_px)); \ dst_px = ivec2((kernel_origin >> mask_shift) + local_px); \ - imageStore(out_mip__, dst_px, vec4(max_depth)); \ + imageStoreFast(out_mip__, dst_px, vec4(max_depth)); \ } \ barrier(); /* Wait for previous reads to finish. */ \ if (active_thread) { \ @@ -105,14 +105,14 @@ void main() kernel_origin = ivec2(gl_WorkGroupSize.xy) * ivec2(x, y); src_px = ivec2(kernel_origin + local_px) * 2; vec4 samp; - samp.x = imageLoad(out_mip_5, min(src_px + ivec2(0, 1), image_border)).x; - samp.y = imageLoad(out_mip_5, min(src_px + ivec2(1, 1), image_border)).x; - samp.z = imageLoad(out_mip_5, min(src_px + ivec2(1, 0), image_border)).x; - samp.w = imageLoad(out_mip_5, min(src_px + ivec2(0, 0), image_border)).x; + samp.x = imageLoadFast(out_mip_5, min(src_px + ivec2(0, 1), image_border)).x; + samp.y = imageLoadFast(out_mip_5, min(src_px + ivec2(1, 1), image_border)).x; + samp.z = imageLoadFast(out_mip_5, min(src_px + ivec2(1, 0), image_border)).x; + samp.w = imageLoadFast(out_mip_5, min(src_px + ivec2(0, 0), image_border)).x; /* Level 6. */ float max_depth = reduce_max(samp); ivec2 dst_px = ivec2(kernel_origin + local_px); - imageStore(out_mip_6, dst_px, vec4(max_depth)); + imageStoreFast(out_mip_6, dst_px, vec4(max_depth)); store_local_depth(local_px, max_depth); mask_shift = 1; diff --git a/source/blender/gpu/shaders/metal/mtl_shader_defines.msl b/source/blender/gpu/shaders/metal/mtl_shader_defines.msl index 0a974e4d882..b20491ff632 100644 --- a/source/blender/gpu/shaders/metal/mtl_shader_defines.msl +++ b/source/blender/gpu/shaders/metal/mtl_shader_defines.msl @@ -359,6 +359,7 @@ struct SStruct { #define texelFetchOffset(__tex, __texel, __lod, __offset) \ _texelFetch_internal(__tex, __texel, __lod, __offset) #define imageLoad(__image, __coord) _texelFetch_internal(__image, __coord, 0) +#define imageLoadFast(__image, __coord) _texelFetch_internal_fast(__image, __coord, 0) #define texture2(__tex, __uv) _texture_internal_samp(__tex, __uv) #define texture3(__tex, __uv, _bias) _texture_internal_bias(__tex, __uv, bias(float(_bias))) #define textureLod(__tex, __uv, __lod) _texture_internal_level(__tex, __uv, level(float(__lod))) @@ -497,6 +498,14 @@ inline vec _texelFetch_internal(thread _mtl_combined_image_sampler_1d +inline vec _texelFetch_internal_fast(thread _mtl_combined_image_sampler_1d tex, + T texel, + uint lod = 0) +{ + return tex.texture->read(uint(texel)); +} + template inline vec _texelFetch_internal( const thread _mtl_combined_image_sampler_buffer tex, T texel, uint lod = 0) @@ -510,6 +519,13 @@ inline vec _texelFetch_internal( } } +template +inline vec _texelFetch_internal_fast( + const thread _mtl_combined_image_sampler_buffer tex, T texel, uint lod = 0) +{ + return tex.texture->read(uint(texel)); +} + template inline vec _texelFetch_internal(thread _mtl_combined_image_sampler_1d tex, T texel, @@ -526,6 +542,16 @@ inline vec _texelFetch_internal(thread _mtl_combined_image_sampler_1d +inline vec _texelFetch_internal_fast(thread _mtl_combined_image_sampler_1d tex, + T texel, + uint lod, + T offset) +{ + /* LODs not supported for 1d textures. This must be zero. */ + return tex.texture->read(uint(texel + offset), 0); +} + template inline vec _texelFetch_internal(thread _mtl_combined_image_sampler_1d_array tex, vec texel, @@ -546,6 +572,16 @@ inline vec _texelFetch_internal(thread _mtl_combined_image_sampler_1d_arra } } +template +inline vec _texelFetch_internal_fast(thread _mtl_combined_image_sampler_1d_array tex, + vec texel, + uint lod, + vec offset = vec(0, 0)) +{ + /* LODs not supported for 1d textures. This must be zero. */ + return tex.texture->read(uint(texel.x + offset.x), uint(texel.y + offset.y), 0); +} + template inline vec _texelFetch_internal(thread _mtl_combined_image_sampler_2d tex, vec texel, @@ -565,6 +601,15 @@ inline vec _texelFetch_internal(thread _mtl_combined_image_sampler_2d +inline vec _texelFetch_internal_fast(thread _mtl_combined_image_sampler_2d tex, + vec texel, + uint lod, + vec offset = vec(0)) +{ + return tex.texture->read(uint2(texel + offset), lod); +} + template inline vec _texelFetch_internal(thread _mtl_combined_image_sampler_2d_array tex, vec texel, @@ -584,6 +629,15 @@ inline vec _texelFetch_internal(thread _mtl_combined_image_sampler_2d_arra } } +template +inline vec _texelFetch_internal_fast(thread _mtl_combined_image_sampler_2d_array tex, + vec texel, + uint lod, + vec offset = vec(0)) +{ + return tex.texture->read(uint2(texel.xy + offset.xy), uint(texel.z + offset.z), lod); +} + template inline vec _texelFetch_internal(thread _mtl_combined_image_sampler_3d tex, vec texel, @@ -604,6 +658,15 @@ inline vec _texelFetch_internal(thread _mtl_combined_image_sampler_3d +inline vec _texelFetch_internal_fast(thread _mtl_combined_image_sampler_3d tex, + vec texel, + uint lod, + vec offset = vec(0)) +{ + return tex.texture->read(uint3(texel + offset), lod); +} + template inline _msl_return_float _texelFetch_internal( thread _mtl_combined_image_sampler_depth_2d tex, @@ -626,6 +689,17 @@ inline _msl_return_float _texelFetch_internal( } } +template +inline _msl_return_float _texelFetch_internal_fast( + thread _mtl_combined_image_sampler_depth_2d tex, + vec texel, + uint lod, + vec offset = vec(0)) +{ + _msl_return_float fl = {tex.texture->read(uint2(texel + offset), lod)}; + return fl; +} + template inline vec _texture_internal_samp(thread _mtl_combined_image_sampler_2d_array tex, vec texel, diff --git a/source/blender/gpu/shaders/opengl/glsl_shader_defines.glsl b/source/blender/gpu/shaders/opengl/glsl_shader_defines.glsl index 85874fc3fa4..6f87010c3fb 100644 --- a/source/blender/gpu/shaders/opengl/glsl_shader_defines.glsl +++ b/source/blender/gpu/shaders/opengl/glsl_shader_defines.glsl @@ -12,6 +12,7 @@ /* Fast store variant macro. In GLSL this is the same as imageStore, but assumes no bounds * checking. */ #define imageStoreFast imageStore +#define imageLoadFast imageLoad /* Texture format tokens -- Type explicitness required by other Graphics APIs. */ #define depth2D sampler2D