EEVEE Next: Optimize HiZ with fast image load store routines
Authored by Apple: Michael Parkin-White Pull Request: https://projects.blender.org/blender/blender/pulls/116953
This commit is contained in:
committed by
Clément Foucault
parent
d16d2bbd3a
commit
190567f941
@@ -53,16 +53,16 @@ void main()
|
||||
#endif
|
||||
|
||||
if (update_mip_0) {
|
||||
imageStore(out_mip_0, src_px + ivec2(0, 1), samp.xxxx);
|
||||
imageStore(out_mip_0, src_px + ivec2(1, 1), samp.yyyy);
|
||||
imageStore(out_mip_0, src_px + ivec2(1, 0), samp.zzzz);
|
||||
imageStore(out_mip_0, src_px + ivec2(0, 0), samp.wwww);
|
||||
imageStoreFast(out_mip_0, src_px + ivec2(0, 1), samp.xxxx);
|
||||
imageStoreFast(out_mip_0, src_px + ivec2(1, 1), samp.yyyy);
|
||||
imageStoreFast(out_mip_0, src_px + ivec2(1, 0), samp.zzzz);
|
||||
imageStoreFast(out_mip_0, src_px + ivec2(0, 0), samp.wwww);
|
||||
}
|
||||
|
||||
/* Level 1. (No load) */
|
||||
float max_depth = reduce_max(samp);
|
||||
ivec2 dst_px = ivec2(kernel_origin + local_px);
|
||||
imageStore(out_mip_1, dst_px, vec4(max_depth));
|
||||
imageStoreFast(out_mip_1, dst_px, vec4(max_depth));
|
||||
store_local_depth(local_px, max_depth);
|
||||
|
||||
/* Level 2-5. */
|
||||
@@ -75,7 +75,7 @@ void main()
|
||||
if (active_thread) { \
|
||||
max_depth = reduce_max(load_local_depths(local_px)); \
|
||||
dst_px = ivec2((kernel_origin >> mask_shift) + local_px); \
|
||||
imageStore(out_mip__, dst_px, vec4(max_depth)); \
|
||||
imageStoreFast(out_mip__, dst_px, vec4(max_depth)); \
|
||||
} \
|
||||
barrier(); /* Wait for previous reads to finish. */ \
|
||||
if (active_thread) { \
|
||||
@@ -105,14 +105,14 @@ void main()
|
||||
kernel_origin = ivec2(gl_WorkGroupSize.xy) * ivec2(x, y);
|
||||
src_px = ivec2(kernel_origin + local_px) * 2;
|
||||
vec4 samp;
|
||||
samp.x = imageLoad(out_mip_5, min(src_px + ivec2(0, 1), image_border)).x;
|
||||
samp.y = imageLoad(out_mip_5, min(src_px + ivec2(1, 1), image_border)).x;
|
||||
samp.z = imageLoad(out_mip_5, min(src_px + ivec2(1, 0), image_border)).x;
|
||||
samp.w = imageLoad(out_mip_5, min(src_px + ivec2(0, 0), image_border)).x;
|
||||
samp.x = imageLoadFast(out_mip_5, min(src_px + ivec2(0, 1), image_border)).x;
|
||||
samp.y = imageLoadFast(out_mip_5, min(src_px + ivec2(1, 1), image_border)).x;
|
||||
samp.z = imageLoadFast(out_mip_5, min(src_px + ivec2(1, 0), image_border)).x;
|
||||
samp.w = imageLoadFast(out_mip_5, min(src_px + ivec2(0, 0), image_border)).x;
|
||||
/* Level 6. */
|
||||
float max_depth = reduce_max(samp);
|
||||
ivec2 dst_px = ivec2(kernel_origin + local_px);
|
||||
imageStore(out_mip_6, dst_px, vec4(max_depth));
|
||||
imageStoreFast(out_mip_6, dst_px, vec4(max_depth));
|
||||
store_local_depth(local_px, max_depth);
|
||||
|
||||
mask_shift = 1;
|
||||
|
||||
@@ -359,6 +359,7 @@ struct SStruct {
|
||||
#define texelFetchOffset(__tex, __texel, __lod, __offset) \
|
||||
_texelFetch_internal(__tex, __texel, __lod, __offset)
|
||||
#define imageLoad(__image, __coord) _texelFetch_internal(__image, __coord, 0)
|
||||
#define imageLoadFast(__image, __coord) _texelFetch_internal_fast(__image, __coord, 0)
|
||||
#define texture2(__tex, __uv) _texture_internal_samp(__tex, __uv)
|
||||
#define texture3(__tex, __uv, _bias) _texture_internal_bias(__tex, __uv, bias(float(_bias)))
|
||||
#define textureLod(__tex, __uv, __lod) _texture_internal_level(__tex, __uv, level(float(__lod)))
|
||||
@@ -497,6 +498,14 @@ inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d<S, A
|
||||
}
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline vec<S, 4> _texelFetch_internal_fast(thread _mtl_combined_image_sampler_1d<S, A> tex,
|
||||
T texel,
|
||||
uint lod = 0)
|
||||
{
|
||||
return tex.texture->read(uint(texel));
|
||||
}
|
||||
|
||||
template<typename S, typename T>
|
||||
inline vec<S, 4> _texelFetch_internal(
|
||||
const thread _mtl_combined_image_sampler_buffer<S, access::read> tex, T texel, uint lod = 0)
|
||||
@@ -510,6 +519,13 @@ inline vec<S, 4> _texelFetch_internal(
|
||||
}
|
||||
}
|
||||
|
||||
template<typename S, typename T>
|
||||
inline vec<S, 4> _texelFetch_internal_fast(
|
||||
const thread _mtl_combined_image_sampler_buffer<S, access::read> tex, T texel, uint lod = 0)
|
||||
{
|
||||
return tex.texture->read(uint(texel));
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d<S, A> tex,
|
||||
T texel,
|
||||
@@ -526,6 +542,16 @@ inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d<S, A
|
||||
}
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline vec<S, 4> _texelFetch_internal_fast(thread _mtl_combined_image_sampler_1d<S, A> tex,
|
||||
T texel,
|
||||
uint lod,
|
||||
T offset)
|
||||
{
|
||||
/* LODs not supported for 1d textures. This must be zero. */
|
||||
return tex.texture->read(uint(texel + offset), 0);
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d_array<S, A> tex,
|
||||
vec<T, 2> texel,
|
||||
@@ -546,6 +572,16 @@ inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_1d_arra
|
||||
}
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline vec<S, 4> _texelFetch_internal_fast(thread _mtl_combined_image_sampler_1d_array<S, A> tex,
|
||||
vec<T, 2> texel,
|
||||
uint lod,
|
||||
vec<T, 2> offset = vec<T, 2>(0, 0))
|
||||
{
|
||||
/* LODs not supported for 1d textures. This must be zero. */
|
||||
return tex.texture->read(uint(texel.x + offset.x), uint(texel.y + offset.y), 0);
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_2d<S, A> tex,
|
||||
vec<T, 2> texel,
|
||||
@@ -565,6 +601,15 @@ inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_2d<S, A
|
||||
}
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline vec<S, 4> _texelFetch_internal_fast(thread _mtl_combined_image_sampler_2d<S, A> tex,
|
||||
vec<T, 2> texel,
|
||||
uint lod,
|
||||
vec<T, 2> offset = vec<T, 2>(0))
|
||||
{
|
||||
return tex.texture->read(uint2(texel + offset), lod);
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_2d_array<S, A> tex,
|
||||
vec<T, 3> texel,
|
||||
@@ -584,6 +629,15 @@ inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_2d_arra
|
||||
}
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline vec<S, 4> _texelFetch_internal_fast(thread _mtl_combined_image_sampler_2d_array<S, A> tex,
|
||||
vec<T, 3> texel,
|
||||
uint lod,
|
||||
vec<T, 3> offset = vec<T, 3>(0))
|
||||
{
|
||||
return tex.texture->read(uint2(texel.xy + offset.xy), uint(texel.z + offset.z), lod);
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_3d<S, A> tex,
|
||||
vec<T, 3> texel,
|
||||
@@ -604,6 +658,15 @@ inline vec<S, 4> _texelFetch_internal(thread _mtl_combined_image_sampler_3d<S, A
|
||||
}
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline vec<S, 4> _texelFetch_internal_fast(thread _mtl_combined_image_sampler_3d<S, A> tex,
|
||||
vec<T, 3> texel,
|
||||
uint lod,
|
||||
vec<T, 3> offset = vec<T, 3>(0))
|
||||
{
|
||||
return tex.texture->read(uint3(texel + offset), lod);
|
||||
}
|
||||
|
||||
template<typename T, access A>
|
||||
inline _msl_return_float _texelFetch_internal(
|
||||
thread _mtl_combined_image_sampler_depth_2d<float, A> tex,
|
||||
@@ -626,6 +689,17 @@ inline _msl_return_float _texelFetch_internal(
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T, access A>
|
||||
inline _msl_return_float _texelFetch_internal_fast(
|
||||
thread _mtl_combined_image_sampler_depth_2d<float, A> tex,
|
||||
vec<T, 2> texel,
|
||||
uint lod,
|
||||
vec<T, 2> offset = vec<T, 2>(0))
|
||||
{
|
||||
_msl_return_float fl = {tex.texture->read(uint2(texel + offset), lod)};
|
||||
return fl;
|
||||
}
|
||||
|
||||
template<typename S, typename T, access A>
|
||||
inline vec<S, 4> _texture_internal_samp(thread _mtl_combined_image_sampler_2d_array<S, A> tex,
|
||||
vec<T, 3> texel,
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
/* Fast store variant macro. In GLSL this is the same as imageStore, but assumes no bounds
|
||||
* checking. */
|
||||
#define imageStoreFast imageStore
|
||||
#define imageLoadFast imageLoad
|
||||
|
||||
/* Texture format tokens -- Type explicitness required by other Graphics APIs. */
|
||||
#define depth2D sampler2D
|
||||
|
||||
Reference in New Issue
Block a user