Fix: Tonemap node has a wrong luminance scale

The Tonemap node has a wrong luminance scale. This is because the parallel reduction shader for logarithmic sum had a wrong identity value. In particular, its identity was set to 0.0, but since its initialization macro computed the log, the zero becomes a rather large negative value. To fix this, the general structure of the parallel reduction shader was changed such that the identity is used as is, and not passed to the INITIALIZE or LOAD macros. This simplifies the implementation and even avoid the extra texel fetches at the boundary.
2024-01-22 22:03:05 +02:00
parent 089c389b5c
commit 02fc4d6481
2 changed files with 33 additions and 24 deletions
--- a/source/blender/compositor/realtime_compositor/shaders/compositor_parallel_reduction.glsl
+++ b/source/blender/compositor/realtime_compositor/shaders/compositor_parallel_reduction.glsl
@@ -54,25 +54,33 @@ shared TYPE reduction_data[reduction_size];

 void main()
 {
-  /* Load the data from the texture, while returning IDENTITY for out of bound coordinates. The
-   * developer is expected to define the IDENTITY macro to be a vec4 that does not affect the
-   * output of the reduction. For instance, sum reductions have an identity of vec4(0.0), while
-   * max value reductions have an identity of vec4(FLT_MIN). */
-  vec4 value = texture_load(input_tx, ivec2(gl_GlobalInvocationID.xy), IDENTITY);
+  ivec2 texel = ivec2(gl_GlobalInvocationID.xy);

-  /* Initialize the shared array given the previously loaded value. This step can be different
-   * depending on whether this is the initial reduction pass or a latter one. Indeed, the input
-   * texture for the initial reduction is the source texture itself, while the input texture to a
-   * latter reduction pass is an intermediate texture after one or more reductions have happened.
-   * This is significant because the data being reduced might be computed from the original data
-   * and different from it, for instance, when summing the luminance of an image, the original data
-   * is a vec4 color, while the reduced data is a float luminance value. So for the initial
-   * reduction pass, the luminance will be computed from the color, reduced, then stored into an
-   * intermediate float texture. On the other hand, for latter reduction passes, the luminance will
-   * be loaded directly and reduced without extra processing. So the developer is expected to
-   * define the INITIALIZE and LOAD macros to be expressions that derive the needed value from the
-   * loaded value for the initial reduction pass and latter ones respectively. */
-  reduction_data[gl_LocalInvocationIndex] = is_initial_reduction ? INITIALIZE(value) : LOAD(value);
+  /* Initialize the shared array for out of bound invocations using the IDENTITY value. The
+   * developer is expected to define the IDENTITY macro to be a value of type TYPE that does not
+   * affect the output of the reduction. For instance, sum reductions have an identity of 0.0,
+   * while max value reductions have an identity of FLT_MIN */
+  if (any(lessThan(texel, ivec2(0))) || any(greaterThanEqual(texel, texture_size(input_tx)))) {
+    reduction_data[gl_LocalInvocationIndex] = IDENTITY;
+  }
+  else {
+    vec4 value = texture_load_unbound(input_tx, texel);
+
+    /* Initialize the shared array given the previously loaded value. This step can be different
+     * depending on whether this is the initial reduction pass or a latter one. Indeed, the input
+     * texture for the initial reduction is the source texture itself, while the input texture to a
+     * latter reduction pass is an intermediate texture after one or more reductions have happened.
+     * This is significant because the data being reduced might be computed from the original data
+     * and different from it, for instance, when summing the luminance of an image, the original
+     * data is a vec4 color, while the reduced data is a float luminance value. So for the initial
+     * reduction pass, the luminance will be computed from the color, reduced, then stored into an
+     * intermediate float texture. On the other hand, for latter reduction passes, the luminance
+     * will be loaded directly and reduced without extra processing. So the developer is expected
+     * to define the INITIALIZE and LOAD macros to be expressions that derive the needed value from
+     * the loaded value for the initial reduction pass and latter ones respectively. */
+    reduction_data[gl_LocalInvocationIndex] = is_initial_reduction ? INITIALIZE(value) :
+                                                                     LOAD(value);
+  }

  /* Reduce the reduction data by half on every iteration until only one element remains. See the
   * above figure for an intuitive understanding of the stride value. */
--- a/source/blender/compositor/realtime_compositor/shaders/infos/compositor_parallel_reduction_info.hh
+++ b/source/blender/compositor/realtime_compositor/shaders/infos/compositor_parallel_reduction_info.hh
@@ -16,13 +16,13 @@ GPU_SHADER_CREATE_INFO(compositor_parallel_reduction_shared)

 GPU_SHADER_CREATE_INFO(compositor_sum_shared)
    .additional_info("compositor_parallel_reduction_shared")
-    .define("IDENTITY", "vec4(0.0)")
    .define("REDUCE(lhs, rhs)", "lhs + rhs");

 GPU_SHADER_CREATE_INFO(compositor_sum_float_shared)
    .additional_info("compositor_sum_shared")
    .image(0, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
    .define("TYPE", "float")
+    .define("IDENTITY", "0.0")
    .define("LOAD(value)", "value.x");

 GPU_SHADER_CREATE_INFO(compositor_sum_red)
@@ -56,6 +56,7 @@ GPU_SHADER_CREATE_INFO(compositor_sum_color)
    .additional_info("compositor_sum_shared")
    .image(0, GPU_RGBA32F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
    .define("TYPE", "vec4")
+    .define("IDENTITY", "vec4(0.0)")
    .define("INITIALIZE(value)", "value")
    .define("LOAD(value)", "value")
    .do_static_compilation(true);
@@ -69,7 +70,7 @@ GPU_SHADER_CREATE_INFO(compositor_sum_squared_difference_float_shared)
    .image(0, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
    .push_constant(Type::FLOAT, "subtrahend")
    .define("TYPE", "float")
-    .define("IDENTITY", "vec4(subtrahend)")
+    .define("IDENTITY", "0.0")
    .define("LOAD(value)", "value.x")
    .define("REDUCE(lhs, rhs)", "lhs + rhs");

@@ -104,7 +105,7 @@ GPU_SHADER_CREATE_INFO(compositor_maximum_luminance)
    .image(0, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
    .push_constant(Type::VEC3, "luminance_coefficients")
    .define("TYPE", "float")
-    .define("IDENTITY", "vec4(FLT_MIN)")
+    .define("IDENTITY", "FLT_MIN")
    .define("INITIALIZE(value)", "dot(value.rgb, luminance_coefficients)")
    .define("LOAD(value)", "value.x")
    .define("REDUCE(lhs, rhs)", "max(lhs, rhs)")
@@ -116,7 +117,7 @@ GPU_SHADER_CREATE_INFO(compositor_maximum_float_in_range)
    .push_constant(Type::FLOAT, "lower_bound")
    .push_constant(Type::FLOAT, "upper_bound")
    .define("TYPE", "float")
-    .define("IDENTITY", "vec4(lower_bound)")
+    .define("IDENTITY", "lower_bound")
    .define("INITIALIZE(v)", "((v.x <= upper_bound) && (v.x >= lower_bound)) ? v.x : lower_bound")
    .define("LOAD(value)", "value.x")
    .define("REDUCE(lhs, rhs)", "((rhs > lhs) && (rhs <= upper_bound)) ? rhs : lhs")
@@ -132,7 +133,7 @@ GPU_SHADER_CREATE_INFO(compositor_minimum_luminance)
    .image(0, GPU_R32F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
    .push_constant(Type::VEC3, "luminance_coefficients")
    .define("TYPE", "float")
-    .define("IDENTITY", "vec4(FLT_MAX)")
+    .define("IDENTITY", "FLT_MAX")
    .define("INITIALIZE(value)", "dot(value.rgb, luminance_coefficients)")
    .define("LOAD(value)", "value.x")
    .define("REDUCE(lhs, rhs)", "min(lhs, rhs)")
@@ -144,7 +145,7 @@ GPU_SHADER_CREATE_INFO(compositor_minimum_float_in_range)
    .push_constant(Type::FLOAT, "lower_bound")
    .push_constant(Type::FLOAT, "upper_bound")
    .define("TYPE", "float")
-    .define("IDENTITY", "vec4(upper_bound)")
+    .define("IDENTITY", "upper_bound")
    .define("INITIALIZE(v)", "((v.x <= upper_bound) && (v.x >= lower_bound)) ? v.x : upper_bound")
    .define("LOAD(value)", "value.x")
    .define("REDUCE(lhs, rhs)", "((rhs < lhs) && (rhs >= lower_bound)) ? rhs : lhs")