EEVEE Next: Add imageStore/LoadFast ops to Film Shader

Add fast image writing and reading variants for film shader passes.
These variants do not perform range checking on values
and should only be used in cases where the written texel is
guaranteed to be in range. This eliminates additional
branching and simplifies shader logic.

Authored by Apple: Michael Parkin-White
Pull Request: https://projects.blender.org/blender/blender/pulls/121114
This commit is contained in:
Jason Fielder
2024-07-21 17:13:07 +02:00
committed by Clément Foucault
parent db286bbb7f
commit 9059d4f020
5 changed files with 31 additions and 22 deletions

View File

@@ -33,7 +33,7 @@ void cryptomatte_clear_samples(FilmSample dst)
{
int layer_len = imageSize(cryptomatte_img).z;
for (int i = 0; i < layer_len; i++) {
imageStore(cryptomatte_img, ivec3(dst.texel, i), vec4(0.0));
imageStoreFast(cryptomatte_img, ivec3(dst.texel, i), vec4(0.0));
/* Ensure stores are visible to later reads. */
imageFence(cryptomatte_img);
}
@@ -70,7 +70,7 @@ void cryptomatte_store_film_sample(FilmSample dst,
else {
continue;
}
imageStore(cryptomatte_img, img_co, sample_pair);
imageStoreFast(cryptomatte_img, img_co, sample_pair);
break;
}
/* Ensure stores are visible to later reads. */

View File

@@ -11,5 +11,9 @@ void main()
vec4 out_color;
float out_depth;
if (any(greaterThanEqual(texel_film, uniform_buf.film.extent))) {
return;
}
film_process_data(texel_film, out_color, out_depth);
}

View File

@@ -13,7 +13,7 @@ void cryptomatte_load_samples(ivec2 texel, int layer, out vec2 samples[CRYPTOMAT
/* Read all samples from the cryptomatte layer. */
for (int p = 0; p < pass_len; p++) {
vec4 pass_sample = imageLoad(cryptomatte_img, ivec3(texel, p + layer_id));
vec4 pass_sample = imageLoadFast(cryptomatte_img, ivec3(texel, p + layer_id));
samples[p * 2] = pass_sample.xy;
samples[p * 2 + 1] = pass_sample.zw;
}
@@ -59,7 +59,7 @@ void cryptomatte_store_samples(ivec2 texel, int layer, vec2 samples[CRYPTOMATTE_
vec4 pass_sample;
pass_sample.xy = samples[p * 2];
pass_sample.zw = samples[p * 2 + 1];
imageStore(cryptomatte_img, ivec3(texel, p + layer_id), pass_sample);
imageStoreFast(cryptomatte_img, ivec3(texel, p + layer_id), pass_sample);
}
/* Ensure stores are visible to later reads. */
imageFence(cryptomatte_img);
@@ -68,12 +68,17 @@ void cryptomatte_store_samples(ivec2 texel, int layer, vec2 samples[CRYPTOMATTE_
void main()
{
ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
if (any(greaterThanEqual(texel, uniform_buf.film.extent))) {
return;
}
for (int layer = 0; layer < cryptomatte_layer_len; layer++) {
vec2 samples[CRYPTOMATTE_LEVELS_MAX];
cryptomatte_load_samples(texel, layer, samples);
cryptomatte_sort_samples(samples);
/* Repeat texture coordinates as the weight can be optimized to a small portion of the film. */
float weight = imageLoad(
float weight = imageLoadFast(
weight_img,
ivec3(texel % imageSize(weight_img).xy, FILM_WEIGHT_LAYER_ACCUMULATION))
.x;

View File

@@ -10,21 +10,21 @@ void main()
float out_depth;
if (uniform_buf.film.display_only) {
out_depth = imageLoad(depth_img, texel_film).r;
out_depth = imageLoadFast(depth_img, texel_film).r;
if (display_id == -1) {
out_color = texelFetch(in_combined_tx, texel_film, 0);
}
else if (uniform_buf.film.display_storage_type == PASS_STORAGE_VALUE) {
out_color.rgb = imageLoad(value_accum_img, ivec3(texel_film, display_id)).rrr;
out_color.rgb = imageLoadFast(value_accum_img, ivec3(texel_film, display_id)).rrr;
out_color.a = 1.0;
}
else if (uniform_buf.film.display_storage_type == PASS_STORAGE_COLOR) {
out_color = imageLoad(color_accum_img, ivec3(texel_film, display_id));
out_color = imageLoadFast(color_accum_img, ivec3(texel_film, display_id));
}
else /* PASS_STORAGE_CRYPTOMATTE */ {
out_color = cryptomatte_false_color(
imageLoad(cryptomatte_img, ivec3(texel_film, display_id)).r);
imageLoadFast(cryptomatte_img, ivec3(texel_film, display_id)).r);
}
}
else {

View File

@@ -207,7 +207,7 @@ float film_distance_load(ivec2 texel)
if (!uniform_buf.film.use_history || use_reprojection) {
return 0.0;
}
return imageLoad(in_weight_img, ivec3(texel, FILM_WEIGHT_LAYER_DISTANCE)).x;
return imageLoadFast(in_weight_img, ivec3(texel, FILM_WEIGHT_LAYER_DISTANCE)).x;
}
float film_weight_load(ivec2 texel)
@@ -218,7 +218,7 @@ float film_weight_load(ivec2 texel)
if (!uniform_buf.film.use_history || use_reprojection) {
return 0.0;
}
return imageLoad(in_weight_img, ivec3(texel, FILM_WEIGHT_LAYER_ACCUMULATION)).x;
return imageLoadFast(in_weight_img, ivec3(texel, FILM_WEIGHT_LAYER_ACCUMULATION)).x;
}
/* Returns motion in pixel space to retrieve the pixel history. */
@@ -511,7 +511,7 @@ void film_store_combined(
if (display_id == -1) {
display = color;
}
imageStore(out_combined_img, dst.texel, color);
imageStoreFast(out_combined_img, dst.texel, color);
}
void film_store_color(FilmSample dst, int pass_id, vec4 color, inout vec4 display)
@@ -520,7 +520,7 @@ void film_store_color(FilmSample dst, int pass_id, vec4 color, inout vec4 displa
return;
}
vec4 data_film = imageLoad(color_accum_img, ivec3(dst.texel, pass_id));
vec4 data_film = imageLoadFast(color_accum_img, ivec3(dst.texel, pass_id));
color = (data_film * dst.weight + color) * dst.weight_sum_inv;
@@ -538,7 +538,7 @@ void film_store_color(FilmSample dst, int pass_id, vec4 color, inout vec4 displa
if (display_id == pass_id) {
display = color;
}
imageStore(color_accum_img, ivec3(dst.texel, pass_id), color);
imageStoreFast(color_accum_img, ivec3(dst.texel, pass_id), color);
}
void film_store_value(FilmSample dst, int pass_id, float value, inout vec4 display)
@@ -547,7 +547,7 @@ void film_store_value(FilmSample dst, int pass_id, float value, inout vec4 displ
return;
}
float data_film = imageLoad(value_accum_img, ivec3(dst.texel, pass_id)).x;
float data_film = imageLoadFast(value_accum_img, ivec3(dst.texel, pass_id)).x;
value = (data_film * dst.weight + value) * dst.weight_sum_inv;
@@ -559,7 +559,7 @@ void film_store_value(FilmSample dst, int pass_id, float value, inout vec4 displ
if (display_id == pass_id) {
display = vec4(value, value, value, 1.0);
}
imageStore(value_accum_img, ivec3(dst.texel, pass_id), vec4(value));
imageStoreFast(value_accum_img, ivec3(dst.texel, pass_id), vec4(value));
}
/* Nearest sample variant. Always stores the data. */
@@ -572,7 +572,7 @@ void film_store_data(ivec2 texel_film, int pass_id, vec4 data_sample, inout vec4
if (display_id == pass_id) {
display = data_sample;
}
imageStore(color_accum_img, ivec3(texel_film, pass_id), data_sample);
imageStoreFast(color_accum_img, ivec3(texel_film, pass_id), data_sample);
}
void film_store_depth(ivec2 texel_film, float value, out float out_depth)
@@ -583,17 +583,17 @@ void film_store_depth(ivec2 texel_film, float value, out float out_depth)
out_depth = film_depth_convert_to_scene(value);
imageStore(depth_img, texel_film, vec4(out_depth));
imageStoreFast(depth_img, texel_film, vec4(out_depth));
}
void film_store_distance(ivec2 texel, float value)
{
imageStore(out_weight_img, ivec3(texel, FILM_WEIGHT_LAYER_DISTANCE), vec4(value));
imageStoreFast(out_weight_img, ivec3(texel, FILM_WEIGHT_LAYER_DISTANCE), vec4(value));
}
void film_store_weight(ivec2 texel, float value)
{
imageStore(out_weight_img, ivec3(texel, FILM_WEIGHT_LAYER_ACCUMULATION), vec4(value));
imageStoreFast(out_weight_img, ivec3(texel, FILM_WEIGHT_LAYER_ACCUMULATION), vec4(value));
}
float film_display_depth_amend(ivec2 texel, float depth)
@@ -673,9 +673,9 @@ void film_process_data(ivec2 texel_film, out vec4 out_color, out float out_depth
film_store_distance(texel_film, film_sample.weight);
}
else {
out_depth = imageLoad(depth_img, texel_film).r;
out_depth = imageLoadFast(depth_img, texel_film).r;
if (display_id != -1 && display_id == normal_id) {
out_color = imageLoad(color_accum_img, ivec3(texel_film, display_id));
out_color = imageLoadFast(color_accum_img, ivec3(texel_film, display_id));
}
}
}