Refactor: Move Blur Gamma Correction to its own algorithm

Move the Gamma Correction pass of blur nodes into its own algorithm to avoid code duplication and optimize pixel access, since gamma is now applied for each pixel in the filter window. Gives a 15% improvement. Pull Request: https://projects.blender.org/blender/blender/pulls/131480
2024-12-06 08:14:26 +01:00
parent 393c75cc39
commit 1732467bc6
21 changed files with 300 additions and 285 deletions
--- a/source/blender/compositor/realtime_compositor/CMakeLists.txt
+++ b/source/blender/compositor/realtime_compositor/CMakeLists.txt
@@ -76,6 +76,7 @@ set(SRC
  algorithms/intern/compute_preview.cc
  algorithms/intern/deriche_gaussian_blur.cc
  algorithms/intern/extract_alpha.cc
+  algorithms/intern/gamma_correct.cc
  algorithms/intern/jump_flooding.cc
  algorithms/intern/morphological_blur.cc
  algorithms/intern/morphological_distance.cc
@@ -93,6 +94,7 @@ set(SRC
  algorithms/COM_algorithm_compute_preview.hh
  algorithms/COM_algorithm_deriche_gaussian_blur.hh
  algorithms/COM_algorithm_extract_alpha.hh
+  algorithms/COM_algorithm_gamma_correct.hh
  algorithms/COM_algorithm_jump_flooding.hh
  algorithms/COM_algorithm_morphological_blur.hh
  algorithms/COM_algorithm_morphological_distance.hh
@@ -181,6 +183,7 @@ set(GLSL_SRC
  shaders/compositor_ellipse_mask.glsl
  shaders/compositor_filter.glsl
  shaders/compositor_flip.glsl
+  shaders/compositor_gamma_correct.glsl
  shaders/compositor_glare_bloom_downsample.glsl
  shaders/compositor_glare_bloom_upsample.glsl
  shaders/compositor_glare_ghost_accumulate.glsl
@@ -254,7 +257,6 @@ set(GLSL_SRC
  shaders/compositor_z_combine_simple.glsl

  shaders/library/gpu_shader_compositor_alpha_over.glsl
-  shaders/library/gpu_shader_compositor_blur_common.glsl
  shaders/library/gpu_shader_compositor_bright_contrast.glsl
  shaders/library/gpu_shader_compositor_channel_matte.glsl
  shaders/library/gpu_shader_compositor_chroma_matte.glsl
--- a/source/blender/compositor/realtime_compositor/algorithms/COM_algorithm_gamma_correct.hh
+++ b/source/blender/compositor/realtime_compositor/algorithms/COM_algorithm_gamma_correct.hh
@@ -0,0 +1,22 @@
+/* SPDX-FileCopyrightText: 2024 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+#pragma once
+
+#include "COM_context.hh"
+#include "COM_result.hh"
+
+namespace blender::realtime_compositor {
+
+/* Gamma corrects the inputs in its straight alpha form and writes the result to the output. The
+ * gamma factor is assumes to be 2. The output will be allocated internally and is thus expected
+ * not to be previously allocated. */
+void gamma_correct(Context &context, const Result &input, Result &output);
+
+/* Gamma uncorrects the inputs in its straight alpha form and writes the result to the output. The
+ * gamma factor is assumes to be 2. The output will be allocated internally and is thus expected
+ * not to be previously allocated. */
+void gamma_uncorrect(Context &context, const Result &input, Result &output);
+
+}  // namespace blender::realtime_compositor
--- a/source/blender/compositor/realtime_compositor/algorithms/COM_algorithm_symmetric_separable_blur.hh
+++ b/source/blender/compositor/realtime_compositor/algorithms/COM_algorithm_symmetric_separable_blur.hh
@@ -17,15 +17,12 @@ namespace blender::realtime_compositor {
 * and filter type using SymmetricSeparableBlurWeights. The output is written to the given output
 * result, which will be allocated internally and is thus expected not to be previously allocated.
 * If extend_bounds is true, the output will have an extra radius amount of pixels on the boundary
- * of the image, where blurring can take place assuming a fully transparent out of bound values. If
- * gamma_correct is true, the input will be gamma corrected before blurring and then uncorrected
- * after blurring, using a gamma coefficient of 2. */
+ * of the image, where blurring can take place assuming a fully transparent out of bound values. */
 void symmetric_separable_blur(Context &context,
                              const Result &input,
                              Result &output,
                              const float2 &radius,
                              const int filter_type = R_FILTER_GAUSS,
-                              const bool extend_bounds = false,
-                              const bool gamma_correct = false);
+                              const bool extend_bounds = false);

 }  // namespace blender::realtime_compositor
--- a/source/blender/compositor/realtime_compositor/algorithms/intern/gamma_correct.cc
+++ b/source/blender/compositor/realtime_compositor/algorithms/intern/gamma_correct.cc
@@ -0,0 +1,93 @@
+/* SPDX-FileCopyrightText: 2024 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "BLI_math_base.hh"
+
+#include "GPU_shader.hh"
+
+#include "COM_context.hh"
+#include "COM_result.hh"
+#include "COM_utilities.hh"
+
+#include "COM_algorithm_gamma_correct.hh"
+
+namespace blender::realtime_compositor {
+
+static void gamma_correct_gpu(Context &context, const Result &input, Result &output)
+{
+  GPUShader *shader = context.get_shader("compositor_gamma_correct");
+  GPU_shader_bind(shader);
+
+  input.bind_as_texture(shader, "input_tx");
+
+  output.allocate_texture(input.domain());
+  output.bind_as_image(shader, "output_img");
+
+  compute_dispatch_threads_at_least(shader, input.domain().size);
+
+  GPU_shader_unbind();
+  input.unbind_as_texture();
+  output.unbind_as_image();
+}
+
+static void gamma_correct_cpu(const Result &input, Result &output)
+{
+  output.allocate_texture(input.domain());
+  parallel_for(input.domain().size, [&](const int2 texel) {
+    float4 color = input.load_pixel<float4>(texel);
+    float alpha = color.w > 0.0f ? color.w : 1.0f;
+    float3 corrected_color = math::square(math::max(color.xyz() / alpha, float3(0.0f))) * alpha;
+    output.store_pixel(texel, float4(corrected_color, color.w));
+  });
+}
+
+void gamma_correct(Context &context, const Result &input, Result &output)
+{
+  if (context.use_gpu()) {
+    gamma_correct_gpu(context, input, output);
+  }
+  else {
+    gamma_correct_cpu(input, output);
+  }
+}
+
+static void gamma_uncorrect_gpu(Context &context, const Result &input, Result &output)
+{
+  GPUShader *shader = context.get_shader("compositor_gamma_uncorrect");
+  GPU_shader_bind(shader);
+
+  input.bind_as_texture(shader, "input_tx");
+
+  output.allocate_texture(input.domain());
+  output.bind_as_image(shader, "output_img");
+
+  compute_dispatch_threads_at_least(shader, input.domain().size);
+
+  GPU_shader_unbind();
+  input.unbind_as_texture();
+  output.unbind_as_image();
+}
+
+static void gamma_uncorrect_cpu(const Result &input, Result &output)
+{
+  output.allocate_texture(input.domain());
+  parallel_for(input.domain().size, [&](const int2 texel) {
+    float4 color = input.load_pixel<float4>(texel);
+    float alpha = color.w > 0.0f ? color.w : 1.0f;
+    float3 corrected_color = math::sqrt(math::max(color.xyz() / alpha, float3(0.0f))) * alpha;
+    output.store_pixel(texel, float4(corrected_color, color.w));
+  });
+}
+
+void gamma_uncorrect(Context &context, const Result &input, Result &output)
+{
+  if (context.use_gpu()) {
+    gamma_uncorrect_gpu(context, input, output);
+  }
+  else {
+    gamma_uncorrect_cpu(input, output);
+  }
+}
+
+}  // namespace blender::realtime_compositor
--- a/source/blender/compositor/realtime_compositor/algorithms/intern/symmetric_separable_blur.cc
+++ b/source/blender/compositor/realtime_compositor/algorithms/intern/symmetric_separable_blur.cc
@@ -20,39 +20,17 @@

 namespace blender::realtime_compositor {

-/* Preprocess the input of the blur filter by squaring it in its alpha straight form, assuming
- * the given color is alpha pre-multiplied. */
-static float4 gamma_correct_blur_input(const float4 &color)
-{
-  float alpha = color.w > 0.0f ? color.w : 1.0f;
-  float3 corrected_color = math::square(math::max(color.xyz() / alpha, float3(0.0f))) * alpha;
-  return float4(corrected_color, color.w);
-}
-
-/* Postprocess the output of the blur filter by taking its square root it in its alpha straight
- * form, assuming the given color is alpha pre-multiplied. This essential undoes the processing
- * done by the gamma_correct_blur_input function. */
-static float4 gamma_uncorrect_blur_output(const float4 &color)
-{
-  float alpha = color.w > 0.0f ? color.w : 1.0f;
-  float3 uncorrected_color = math::sqrt(math::max(color.xyz() / alpha, float3(0.0f))) * alpha;
-  return float4(uncorrected_color, color.w);
-}
-
 static void blur_pass(const Result &input,
                      const Result &weights,
                      Result &output,
-                      const bool extend_bounds,
-                      const bool gamma_correct_input,
-                      const bool gamma_uncorrect_output)
+                      const bool extend_bounds)
 {
-  /* Loads the input color of the pixel at the given texel. If gamma correction is enabled, the
-   * color is gamma corrected. If bounds are extended, then the input is treated as padded by a
-   * blur size amount of pixels of zero color, and the given texel is assumed to be in the space of
-   * the image after padding. So we offset the texel by the blur radius amount and fallback to a
-   * zero color if it is out of bounds. For instance, if the input is padded by 5 pixels to the
-   * left of the image, the first 5 pixels should be out of bounds and thus zero, hence the
-   * introduced offset. */
+  /* Loads the input color of the pixel at the given texel. If bounds are extended, then the input
+   * is treated as padded by a blur size amount of pixels of zero color, and the given texel is
+   * assumed to be in the space of the image after padding. So we offset the texel by the blur
+   * radius amount and fallback to a zero color if it is out of bounds. For instance, if the input
+   * is padded by 5 pixels to the left of the image, the first 5 pixels should be out of bounds and
+   * thus zero, hence the introduced offset. */
  auto load_input = [&](const int2 texel) {
    float4 color;
    if (extend_bounds) {
@@ -65,10 +43,6 @@ static void blur_pass(const Result &input,
      color = input.load_pixel_extended_generic_type(texel);
    }

-    if (gamma_correct_input) {
-      color = gamma_correct_blur_input(color);
-    }
-
    return color;
  };

@@ -92,10 +66,6 @@ static void blur_pass(const Result &input,
      accumulated_color += load_input(texel + int2(-i, 0)) * weight;
    }

-    if (gamma_uncorrect_output) {
-      accumulated_color = gamma_uncorrect_blur_output(accumulated_color);
-    }
-
    /* Write the color using the transposed texel. See the horizontal_pass method for more
     * information on the rational behind this. */
    output.store_pixel_generic_type(int2(texel.y, texel.x), accumulated_color);
@@ -128,15 +98,12 @@ static Result horizontal_pass_gpu(Context &context,
                                  const Result &input,
                                  const float radius,
                                  const int filter_type,
-                                  const bool extend_bounds,
-                                  const bool gamma_correct)
+                                  const bool extend_bounds)
 {
  GPUShader *shader = context.get_shader(get_blur_shader(input.type()));
  GPU_shader_bind(shader);

  GPU_shader_uniform_1b(shader, "extend_bounds", extend_bounds);
-  GPU_shader_uniform_1b(shader, "gamma_correct_input", gamma_correct);
-  GPU_shader_uniform_1b(shader, "gamma_uncorrect_output", false);

  input.bind_as_texture(shader, "input_tx");

@@ -177,8 +144,7 @@ static Result horizontal_pass_cpu(Context &context,
                                  const Result &input,
                                  const float radius,
                                  const int filter_type,
-                                  const bool extend_bounds,
-                                  const bool gamma_correct)
+                                  const bool extend_bounds)
 {
  const Result &weights = context.cache_manager().symmetric_separable_blur_weights.get(
      context, filter_type, radius);
@@ -201,7 +167,7 @@ static Result horizontal_pass_cpu(Context &context,
  Result output = context.create_result(input.type());
  output.allocate_texture(transposed_domain);

-  blur_pass(input, weights, output, extend_bounds, gamma_correct, false);
+  blur_pass(input, weights, output, extend_bounds);

  return output;
 }
@@ -210,13 +176,12 @@ static Result horizontal_pass(Context &context,
                              const Result &input,
                              const float radius,
                              const int filter_type,
-                              const bool extend_bounds,
-                              const bool gamma_correct)
+                              const bool extend_bounds)
 {
  if (context.use_gpu()) {
-    return horizontal_pass_gpu(context, input, radius, filter_type, extend_bounds, gamma_correct);
+    return horizontal_pass_gpu(context, input, radius, filter_type, extend_bounds);
  }
-  return horizontal_pass_cpu(context, input, radius, filter_type, extend_bounds, gamma_correct);
+  return horizontal_pass_cpu(context, input, radius, filter_type, extend_bounds);
 }

 static void vertical_pass_gpu(Context &context,
@@ -225,15 +190,12 @@ static void vertical_pass_gpu(Context &context,
                              Result &output,
                              const float2 &radius,
                              const int filter_type,
-                              const bool extend_bounds,
-                              const bool gamma_correct)
+                              const bool extend_bounds)
 {
  GPUShader *shader = context.get_shader(get_blur_shader(original_input.type()));
  GPU_shader_bind(shader);

  GPU_shader_uniform_1b(shader, "extend_bounds", extend_bounds);
-  GPU_shader_uniform_1b(shader, "gamma_correct_input", false);
-  GPU_shader_uniform_1b(shader, "gamma_uncorrect_output", gamma_correct);

  horizontal_pass_result.bind_as_texture(shader, "input_tx");

@@ -266,8 +228,7 @@ static void vertical_pass_cpu(Context &context,
                              Result &output,
                              const float2 &radius,
                              const int filter_type,
-                              const bool extend_bounds,
-                              const bool gamma_correct)
+                              const bool extend_bounds)
 {
  const Result &weights = context.cache_manager().symmetric_separable_blur_weights.get(
      context, filter_type, radius.y);
@@ -279,7 +240,7 @@ static void vertical_pass_cpu(Context &context,
  }
  output.allocate_texture(domain);

-  blur_pass(horizontal_pass_result, weights, output, extend_bounds, false, gamma_correct);
+  blur_pass(horizontal_pass_result, weights, output, extend_bounds);
 }

 static void vertical_pass(Context &context,
@@ -288,8 +249,7 @@ static void vertical_pass(Context &context,
                          Result &output,
                          const float2 &radius,
                          const int filter_type,
-                          const bool extend_bounds,
-                          const bool gamma_correct)
+                          const bool extend_bounds)
 {
  if (context.use_gpu()) {
    vertical_pass_gpu(context,
@@ -298,8 +258,7 @@ static void vertical_pass(Context &context,
                      output,
                      radius,
                      filter_type,
-                      extend_bounds,
-                      gamma_correct);
+                      extend_bounds);
  }
  else {
    vertical_pass_cpu(context,
@@ -308,8 +267,7 @@ static void vertical_pass(Context &context,
                      output,
                      radius,
                      filter_type,
-                      extend_bounds,
-                      gamma_correct);
+                      extend_bounds);
  }
 }

@@ -318,20 +276,13 @@ void symmetric_separable_blur(Context &context,
                              Result &output,
                              const float2 &radius,
                              const int filter_type,
-                              const bool extend_bounds,
-                              const bool gamma_correct)
+                              const bool extend_bounds)
 {
  Result horizontal_pass_result = horizontal_pass(
-      context, input, radius.x, filter_type, extend_bounds, gamma_correct);
+      context, input, radius.x, filter_type, extend_bounds);

-  vertical_pass(context,
-                input,
-                horizontal_pass_result,
-                output,
-                radius,
-                filter_type,
-                extend_bounds,
-                gamma_correct);
+  vertical_pass(
+      context, input, horizontal_pass_result, output, radius, filter_type, extend_bounds);

  horizontal_pass_result.release();
 }
--- a/source/blender/compositor/realtime_compositor/shaders/compositor_defocus_blur.glsl
+++ b/source/blender/compositor/realtime_compositor/shaders/compositor_defocus_blur.glsl
@@ -2,7 +2,6 @@
 *
 * SPDX-License-Identifier: GPL-2.0-or-later */

-#include "gpu_shader_compositor_blur_common.glsl"
 #include "gpu_shader_compositor_texture_utilities.glsl"
 #include "gpu_shader_math_vector_lib.glsl"

@@ -54,10 +53,6 @@ void main()
      vec4 weight = load_weight(ivec2(x, y), radius);
      vec4 input_color = texture_load(input_tx, texel + ivec2(x, y));

-      if (gamma_correct) {
-        input_color = gamma_correct_blur_input(input_color);
-      }
-
      accumulated_color += input_color * weight;
      accumulated_weight += weight;
    }
@@ -65,9 +60,5 @@ void main()

  accumulated_color = safe_divide(accumulated_color, accumulated_weight);

-  if (gamma_correct) {
-    accumulated_color = gamma_uncorrect_blur_output(accumulated_color);
-  }
-
  imageStore(output_img, texel, accumulated_color);
 }
--- a/source/blender/compositor/realtime_compositor/shaders/compositor_gamma_correct.glsl
+++ b/source/blender/compositor/realtime_compositor/shaders/compositor_gamma_correct.glsl
@@ -0,0 +1,14 @@
+/* SPDX-FileCopyrightText: 2024 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "gpu_shader_compositor_texture_utilities.glsl"
+
+void main()
+{
+  ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
+  vec4 color = texture_load(input_tx, texel);
+  float alpha = color.a > 0.0 ? color.a : 1.0;
+  vec3 corrected_color = FUNCTION(max(color.rgb / alpha, vec3(0.0))) * alpha;
+  imageStore(output_img, texel, vec4(corrected_color, color.a));
+}
--- a/source/blender/compositor/realtime_compositor/shaders/compositor_symmetric_blur.glsl
+++ b/source/blender/compositor/realtime_compositor/shaders/compositor_symmetric_blur.glsl
@@ -2,15 +2,14 @@
 *
 * SPDX-License-Identifier: GPL-2.0-or-later */

-#include "gpu_shader_compositor_blur_common.glsl"
 #include "gpu_shader_compositor_texture_utilities.glsl"

-/* Loads the input color of the pixel at the given texel. If gamma correction is enabled, the color
- * is gamma corrected. If bounds are extended, then the input is treated as padded by a blur size
- * amount of pixels of zero color, and the given texel is assumed to be in the space of the image
- * after padding. So we offset the texel by the blur radius amount and fallback to a zero color if
- * it is out of bounds. For instance, if the input is padded by 5 pixels to the left of the image,
- * the first 5 pixels should be out of bounds and thus zero, hence the introduced offset. */
+/* Loads the input color of the pixel at the given texel. If bounds are extended, then the input is
+ * treated as padded by a blur size amount of pixels of zero color, and the given texel is assumed
+ * to be in the space of the image after padding. So we offset the texel by the blur radius amount
+ * and fallback to a zero color if it is out of bounds. For instance, if the input is padded by 5
+ * pixels to the left of the image, the first 5 pixels should be out of bounds and thus zero, hence
+ * the introduced offset. */
 vec4 load_input(ivec2 texel)
 {
  vec4 color;
@@ -24,10 +23,6 @@ vec4 load_input(ivec2 texel)
    color = texture_load(input_tx, texel);
  }

-  if (gamma_correct) {
-    color = gamma_correct_blur_input(color);
-  }
-
  return color;
 }

@@ -77,9 +72,5 @@ void main()
    }
  }

-  if (gamma_correct) {
-    accumulated_color = gamma_uncorrect_blur_output(accumulated_color);
-  }
-
  imageStore(output_img, texel, accumulated_color);
 }
--- a/source/blender/compositor/realtime_compositor/shaders/compositor_symmetric_blur_variable_size.glsl
+++ b/source/blender/compositor/realtime_compositor/shaders/compositor_symmetric_blur_variable_size.glsl
@@ -2,16 +2,15 @@
 *
 * SPDX-License-Identifier: GPL-2.0-or-later */

-#include "gpu_shader_compositor_blur_common.glsl"
 #include "gpu_shader_compositor_texture_utilities.glsl"
 #include "gpu_shader_math_vector_lib.glsl"

-/* Loads the input color of the pixel at the given texel. If gamma correction is enabled, the color
- * is gamma corrected. If bounds are extended, then the input is treated as padded by a blur size
- * amount of pixels of zero color, and the given texel is assumed to be in the space of the image
- * after padding. So we offset the texel by the blur radius amount and fallback to a zero color if
- * it is out of bounds. For instance, if the input is padded by 5 pixels to the left of the image,
- * the first 5 pixels should be out of bounds and thus zero, hence the introduced offset. */
+/* Loads the input color of the pixel at the given texel. If bounds are extended, then the input is
+ * treated as padded by a blur size amount of pixels of zero color, and the given texel is assumed
+ * to be in the space of the image after padding. So we offset the texel by the blur radius amount
+ * and fallback to a zero color if it is out of bounds. For instance, if the input is padded by 5
+ * pixels to the left of the image, the first 5 pixels should be out of bounds and thus zero, hence
+ * the introduced offset. */
 vec4 load_input(ivec2 texel)
 {
  vec4 color;
@@ -25,15 +24,11 @@ vec4 load_input(ivec2 texel)
    color = texture_load(input_tx, texel);
  }

-  if (gamma_correct) {
-    color = gamma_correct_blur_input(color);
-  }
-
  return color;
 }

-/* Similar to load_input but loads the size instead, has no gamma correction, and clamps to borders
- * instead of returning zero for out of bound access. See load_input for more information. */
+/* Similar to load_input but loads the size instead and clamps to borders instead of returning zero
+ * for out of bound access. See load_input for more information. */
 float load_size(ivec2 texel)
 {
  ivec2 blur_radius = texture_size(weights_tx) - 1;
@@ -104,9 +99,5 @@ void main()

  accumulated_color = safe_divide(accumulated_color, accumulated_weight);

-  if (gamma_correct) {
-    accumulated_color = gamma_uncorrect_blur_output(accumulated_color);
-  }
-
  imageStore(output_img, texel, accumulated_color);
 }
--- a/source/blender/compositor/realtime_compositor/shaders/compositor_symmetric_separable_blur.glsl
+++ b/source/blender/compositor/realtime_compositor/shaders/compositor_symmetric_separable_blur.glsl
@@ -2,7 +2,6 @@
 *
 * SPDX-License-Identifier: GPL-2.0-or-later */

-#include "gpu_shader_compositor_blur_common.glsl"
 #include "gpu_shader_compositor_texture_utilities.glsl"

 vec4 load_input(ivec2 texel)
@@ -20,10 +19,6 @@ vec4 load_input(ivec2 texel)
    color = texture_load(input_tx, texel);
  }

-  if (gamma_correct_input) {
-    color = gamma_correct_blur_input(color);
-  }
-
  return color;
 }

@@ -47,10 +42,6 @@ void main()
    accumulated_color += load_input(texel + ivec2(-i, 0)) * weight;
  }

-  if (gamma_uncorrect_output) {
-    accumulated_color = gamma_uncorrect_blur_output(accumulated_color);
-  }
-
  /* Write the color using the transposed texel. See the execute_separable_blur_horizontal_pass
   * method for more information on the rational behind this. */
  imageStore(output_img, texel.yx, accumulated_color);
--- a/source/blender/compositor/realtime_compositor/shaders/compositor_symmetric_separable_blur_variable_size.glsl
+++ b/source/blender/compositor/realtime_compositor/shaders/compositor_symmetric_separable_blur_variable_size.glsl
@@ -2,7 +2,6 @@
 *
 * SPDX-License-Identifier: GPL-2.0-or-later */

-#include "gpu_shader_compositor_blur_common.glsl"
 #include "gpu_shader_compositor_texture_utilities.glsl"

 void main()
--- a/source/blender/compositor/realtime_compositor/shaders/infos/compositor_defocus_info.hh
+++ b/source/blender/compositor/realtime_compositor/shaders/infos/compositor_defocus_info.hh
@@ -29,7 +29,6 @@ GPU_SHADER_CREATE_END()

 GPU_SHADER_CREATE_INFO(compositor_defocus_blur)
 LOCAL_GROUP_SIZE(16, 16)
-PUSH_CONSTANT(BOOL, gamma_correct)
 PUSH_CONSTANT(INT, search_radius)
 SAMPLER(0, FLOAT_2D, input_tx)
 SAMPLER(1, FLOAT_2D, weights_tx)
--- a/source/blender/compositor/realtime_compositor/shaders/infos/compositor_gamma_correct_info.hh
+++ b/source/blender/compositor/realtime_compositor/shaders/infos/compositor_gamma_correct_info.hh
@@ -0,0 +1,24 @@
+/* SPDX-FileCopyrightText: 2024 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "gpu_shader_create_info.hh"
+
+GPU_SHADER_CREATE_INFO(compositor_gamma_correct_shared)
+LOCAL_GROUP_SIZE(16, 16)
+SAMPLER(0, FLOAT_2D, input_tx)
+IMAGE(0, GPU_RGBA16F, WRITE, FLOAT_2D, output_img)
+COMPUTE_SOURCE("compositor_gamma_correct.glsl")
+GPU_SHADER_CREATE_END()
+
+GPU_SHADER_CREATE_INFO(compositor_gamma_correct)
+ADDITIONAL_INFO(compositor_gamma_correct_shared)
+DEFINE_VALUE("FUNCTION(x)", "(x * x)")
+DO_STATIC_COMPILATION()
+GPU_SHADER_CREATE_END()
+
+GPU_SHADER_CREATE_INFO(compositor_gamma_uncorrect)
+ADDITIONAL_INFO(compositor_gamma_correct_shared)
+DEFINE_VALUE("FUNCTION(x)", "sqrt(x)")
+DO_STATIC_COMPILATION()
+GPU_SHADER_CREATE_END()
--- a/source/blender/compositor/realtime_compositor/shaders/infos/compositor_symmetric_blur_info.hh
+++ b/source/blender/compositor/realtime_compositor/shaders/infos/compositor_symmetric_blur_info.hh
@@ -7,7 +7,6 @@
 GPU_SHADER_CREATE_INFO(compositor_symmetric_blur)
 LOCAL_GROUP_SIZE(16, 16)
 PUSH_CONSTANT(BOOL, extend_bounds)
-PUSH_CONSTANT(BOOL, gamma_correct)
 SAMPLER(0, FLOAT_2D, input_tx)
 SAMPLER(1, FLOAT_2D, weights_tx)
 IMAGE(0, GPU_RGBA16F, WRITE, FLOAT_2D, output_img)
--- a/source/blender/compositor/realtime_compositor/shaders/infos/compositor_symmetric_blur_variable_size_info.hh
+++ b/source/blender/compositor/realtime_compositor/shaders/infos/compositor_symmetric_blur_variable_size_info.hh
@@ -7,7 +7,6 @@
 GPU_SHADER_CREATE_INFO(compositor_symmetric_blur_variable_size)
 LOCAL_GROUP_SIZE(16, 16)
 PUSH_CONSTANT(BOOL, extend_bounds)
-PUSH_CONSTANT(BOOL, gamma_correct)
 SAMPLER(0, FLOAT_2D, input_tx)
 SAMPLER(1, FLOAT_2D, weights_tx)
 SAMPLER(2, FLOAT_2D, size_tx)
--- a/source/blender/compositor/realtime_compositor/shaders/infos/compositor_symmetric_separable_blur_info.hh
+++ b/source/blender/compositor/realtime_compositor/shaders/infos/compositor_symmetric_separable_blur_info.hh
@@ -7,8 +7,6 @@
 GPU_SHADER_CREATE_INFO(compositor_symmetric_separable_blur_shared)
 LOCAL_GROUP_SIZE(16, 16)
 PUSH_CONSTANT(BOOL, extend_bounds)
-PUSH_CONSTANT(BOOL, gamma_correct_input)
-PUSH_CONSTANT(BOOL, gamma_uncorrect_output)
 SAMPLER(0, FLOAT_2D, input_tx)
 SAMPLER(1, FLOAT_2D, weights_tx)
 COMPUTE_SOURCE("compositor_symmetric_separable_blur.glsl")
--- a/source/blender/compositor/realtime_compositor/shaders/library/gpu_shader_compositor_blur_common.glsl
+++ b/source/blender/compositor/realtime_compositor/shaders/library/gpu_shader_compositor_blur_common.glsl
@@ -1,24 +0,0 @@
-/* SPDX-FileCopyrightText: 2022 Blender Authors
- *
- * SPDX-License-Identifier: GPL-2.0-or-later */
-
-#include "gpu_shader_math_base_lib.glsl"
-
-/* Preprocess the input of the blur filter by squaring it in its alpha straight form, assuming the
- * given color is alpha pre-multiplied. */
-vec4 gamma_correct_blur_input(vec4 color)
-{
-  float alpha = color.a > 0.0 ? color.a : 1.0;
-  vec3 corrected_color = square(max(color.rgb / alpha, vec3(0.0))) * alpha;
-  return vec4(corrected_color, color.a);
-}
-
-/* Postprocess the output of the blur filter by taking its square root it in its alpha straight
- * form, assuming the given color is alpha pre-multiplied. This essential undoes the processing
- * done by the gamma_correct_blur_input function. */
-vec4 gamma_uncorrect_blur_output(vec4 color)
-{
-  float alpha = color.a > 0.0 ? color.a : 1.0;
-  vec3 uncorrected_color = sqrt(max(color.rgb / alpha, vec3(0.0))) * alpha;
-  return vec4(uncorrected_color, color.a);
-}
--- a/source/blender/gpu/intern/gpu_shader_create_info_list.hh
+++ b/source/blender/gpu/intern/gpu_shader_create_info_list.hh
@@ -70,6 +70,7 @@
 #include "compositor_ellipse_mask_info.hh"
 #include "compositor_filter_info.hh"
 #include "compositor_flip_info.hh"
+#include "compositor_gamma_correct_info.hh"
 #include "compositor_glare_info.hh"
 #include "compositor_id_mask_info.hh"
 #include "compositor_image_crop_info.hh"
--- a/source/blender/nodes/composite/nodes/node_composite_blur.cc
+++ b/source/blender/nodes/composite/nodes/node_composite_blur.cc
@@ -19,6 +19,7 @@
 #include "GPU_shader.hh"
 #include "GPU_texture.hh"

+#include "COM_algorithm_gamma_correct.hh"
 #include "COM_algorithm_recursive_gaussian_blur.hh"
 #include "COM_algorithm_symmetric_separable_blur.hh"
 #include "COM_node_operation.hh"
@@ -102,52 +103,69 @@ class BlurOperation : public NodeOperation {

  void execute() override
  {
+    Result &input = get_input("Image");
+    Result &output = get_result("Image");
    if (is_identity()) {
-      get_input("Image").pass_through(get_result("Image"));
+      input.pass_through(output);
      return;
    }

+    Result *blur_input = &input;
+    Result *blur_output = &output;
+
+    /* Apply gamma correction if needed. */
+    Result gamma_blur_output = this->context().create_result(ResultType::Color);
+    Result gamma_corrected_input = this->context().create_result(ResultType::Color);
+    if (this->should_apply_gamma_correction()) {
+      gamma_correct(this->context(), input, gamma_corrected_input);
+      blur_input = &gamma_corrected_input;
+      blur_output = &gamma_blur_output;
+    }
+
    if (node_storage(bnode()).filtertype == R_FILTER_FAST_GAUSS) {
-      recursive_gaussian_blur(
-          context(), get_input("Image"), get_result("Image"), compute_blur_radius());
+      recursive_gaussian_blur(context(), *blur_input, *blur_output, compute_blur_radius());
    }
    else if (use_variable_size()) {
-      execute_variable_size();
+      execute_variable_size(*blur_input, *blur_output);
    }
    else if (use_separable_filter()) {
      symmetric_separable_blur(context(),
-                               get_input("Image"),
-                               get_result("Image"),
+                               *blur_input,
+                               *blur_output,
                               compute_blur_radius(),
                               node_storage(bnode()).filtertype,
-                               get_extend_bounds(),
-                               node_storage(bnode()).gamma);
+                               get_extend_bounds());
    }
    else {
-      execute_constant_size();
+      execute_constant_size(*blur_input, *blur_output);
+    }
+
+    /* Undo gamma correction. */
+    if (this->should_apply_gamma_correction()) {
+      gamma_corrected_input.release();
+      gamma_uncorrect(this->context(), gamma_blur_output, output);
+      gamma_blur_output.release();
    }
  }

-  void execute_constant_size()
+  void execute_constant_size(const Result &input, Result &output)
  {
    if (this->context().use_gpu()) {
-      this->execute_constant_size_gpu();
+      this->execute_constant_size_gpu(input, output);
    }
    else {
-      this->execute_constant_size_cpu();
+      this->execute_constant_size_cpu(input, output);
    }
  }

-  void execute_constant_size_gpu()
+  void execute_constant_size_gpu(const Result &input, Result &output)
  {
    GPUShader *shader = context().get_shader("compositor_symmetric_blur");
    GPU_shader_bind(shader);

    GPU_shader_uniform_1b(shader, "extend_bounds", get_extend_bounds());
-    GPU_shader_uniform_1b(shader, "gamma_correct", node_storage(bnode()).gamma);

-    const Result &input_image = get_input("Image");
-    input_image.bind_as_texture(shader, "input_tx");
+    input.bind_as_texture(shader, "input_tx");

    const float2 blur_radius = compute_blur_radius();

@@ -161,19 +179,18 @@ class BlurOperation : public NodeOperation {
      domain.size += int2(math::ceil(blur_radius)) * 2;
    }

-    Result &output_image = get_result("Image");
-    output_image.allocate_texture(domain);
-    output_image.bind_as_image(shader, "output_img");
+    output.allocate_texture(domain);
+    output.bind_as_image(shader, "output_img");

    compute_dispatch_threads_at_least(shader, domain.size);

    GPU_shader_unbind();
-    output_image.unbind_as_image();
-    input_image.unbind_as_texture();
+    output.unbind_as_image();
+    input.unbind_as_texture();
    weights.unbind_as_texture();
  }

-  void execute_constant_size_cpu()
+  void execute_constant_size_cpu(const Result &input, Result &output)
  {
    const float2 blur_radius = this->compute_blur_radius();
    const Result &weights = this->context().cache_manager().symmetric_blur_weights.get(
@@ -186,13 +203,10 @@ class BlurOperation : public NodeOperation {
      domain.size += int2(math::ceil(blur_radius)) * 2;
    }

-    Result &output = get_result("Image");
    output.allocate_texture(domain);

-    const Result &input = this->get_input("Image");
-    const bool gamma_correct = node_storage(this->bnode()).gamma;
    auto load_input = [&](const int2 texel) {
-      return this->load_input(input, weights, texel, extend_bounds, gamma_correct);
+      return this->load_input(input, weights, texel, extend_bounds);
    };

    parallel_for(domain.size, [&](const int2 texel) {
@@ -238,34 +252,28 @@ class BlurOperation : public NodeOperation {
        }
      }

-      if (gamma_correct) {
-        accumulated_color = this->gamma_uncorrect_blur_output(accumulated_color);
-      }
-
      output.store_pixel(texel, accumulated_color);
    });
  }

-  void execute_variable_size()
+  void execute_variable_size(const Result &input, Result &output)
  {
    if (this->context().use_gpu()) {
-      this->execute_variable_size_gpu();
+      this->execute_variable_size_gpu(input, output);
    }
    else {
-      this->execute_variable_size_cpu();
+      this->execute_variable_size_cpu(input, output);
    }
  }

-  void execute_variable_size_gpu()
+  void execute_variable_size_gpu(const Result &input, Result &output)
  {
    GPUShader *shader = context().get_shader("compositor_symmetric_blur_variable_size");
    GPU_shader_bind(shader);

    GPU_shader_uniform_1b(shader, "extend_bounds", get_extend_bounds());
-    GPU_shader_uniform_1b(shader, "gamma_correct", node_storage(bnode()).gamma);

-    const Result &input_image = get_input("Image");
-    input_image.bind_as_texture(shader, "input_tx");
+    input.bind_as_texture(shader, "input_tx");

    const float2 blur_radius = compute_blur_radius();

@@ -282,20 +290,19 @@ class BlurOperation : public NodeOperation {
      domain.size += int2(math::ceil(blur_radius)) * 2;
    }

-    Result &output_image = get_result("Image");
-    output_image.allocate_texture(domain);
-    output_image.bind_as_image(shader, "output_img");
+    output.allocate_texture(domain);
+    output.bind_as_image(shader, "output_img");

    compute_dispatch_threads_at_least(shader, domain.size);

    GPU_shader_unbind();
-    output_image.unbind_as_image();
-    input_image.unbind_as_texture();
+    output.unbind_as_image();
+    input.unbind_as_texture();
    weights.unbind_as_texture();
    input_size.unbind_as_texture();
  }

-  void execute_variable_size_cpu()
+  void execute_variable_size_cpu(const Result &input, Result &output)
  {
    const float2 blur_radius = this->compute_blur_radius();
    const Result &weights = this->context().cache_manager().symmetric_blur_weights.get(
@@ -308,19 +315,15 @@ class BlurOperation : public NodeOperation {
      domain.size += int2(math::ceil(blur_radius)) * 2;
    }

-    Result &output = get_result("Image");
    output.allocate_texture(domain);

-    const Result &input = this->get_input("Image");
-    const bool gamma_correct = node_storage(this->bnode()).gamma;
    auto load_input = [&](const int2 texel) {
-      return this->load_input(input, weights, texel, extend_bounds, gamma_correct);
+      return this->load_input(input, weights, texel, extend_bounds);
    };

    const Result &size = get_input("Size");
-    /* Similar to load_input but loads the size instead, has no gamma correction, and clamps to
-     * borders instead of returning zero for out of bound access. See load_input for more
-     * information. */
+    /* Similar to load_input but loads the size instead and clamps to borders instead of returning
+     * zero for out of bound access. See load_input for more information. */
    auto load_size = [&](const int2 texel) {
      int2 blur_radius = weights.domain().size - 1;
      int2 offset = extend_bounds ? blur_radius : int2(0);
@@ -388,26 +391,20 @@ class BlurOperation : public NodeOperation {

      accumulated_color = math::safe_divide(accumulated_color, accumulated_weight);

-      if (gamma_correct) {
-        accumulated_color = this->gamma_uncorrect_blur_output(accumulated_color);
-      }
-
      output.store_pixel(texel, accumulated_color);
    });
  }

-  /* Loads the input color of the pixel at the given texel. If gamma correction is enabled, the
-   * color is gamma corrected. If bounds are extended, then the input is treated as padded by a
-   * blur size amount of pixels of zero color, and the given texel is assumed to be in the space of
-   * the image after padding. So we offset the texel by the blur radius amount and fallback to a
-   * zero color if it is out of bounds. For instance, if the input is padded by 5 pixels to the
-   * left of the image, the first 5 pixels should be out of bounds and thus zero, hence the
-   * introduced offset. */
+  /* Loads the input color of the pixel at the given texel. If bounds are extended, then the input
+   * is treated as padded by a blur size amount of pixels of zero color, and the given texel is
+   * assumed to be in the space of the image after padding. So we offset the texel by the blur
+   * radius amount and fallback to a zero color if it is out of bounds. For instance, if the input
+   * is padded by 5 pixels to the left of the image, the first 5 pixels should be out of bounds and
+   * thus zero, hence the introduced offset. */
  float4 load_input(const Result &input,
                    const Result &weights,
                    const int2 texel,
-                    const bool extend_bounds,
-                    const bool gamma_correct)
+                    const bool extend_bounds)
  {
    float4 color;
    if (extend_bounds) {
@@ -420,32 +417,9 @@ class BlurOperation : public NodeOperation {
      color = input.load_pixel_extended<float4>(texel);
    }

-    if (gamma_correct) {
-      color = this->gamma_correct_blur_input(color);
-    }
-
    return color;
  }

-  /* Preprocess the input of the blur filter by squaring it in its alpha straight form, assuming
-   * the given color is alpha pre-multiplied. */
-  float4 gamma_correct_blur_input(const float4 &color)
-  {
-    float alpha = color.w > 0.0f ? color.w : 1.0f;
-    float3 corrected_color = math::square(math::max(color.xyz() / alpha, float3(0.0f))) * alpha;
-    return float4(corrected_color, color.w);
-  }
-
-  /* Postprocess the output of the blur filter by taking its square root it in its alpha straight
-   * form, assuming the given color is alpha pre-multiplied. This essential undoes the processing
-   * done by the gamma_correct_blur_input function. */
-  float4 gamma_uncorrect_blur_output(const float4 &color)
-  {
-    float alpha = color.w > 0.0f ? color.w : 1.0f;
-    float3 uncorrected_color = math::sqrt(math::max(color.xyz() / alpha, float3(0.0f))) * alpha;
-    return float4(uncorrected_color, color.w);
-  }
-
  float2 compute_blur_radius()
  {
    const float size = math::clamp(get_input("Size").get_float_value_default(1.0f), 0.0f, 1.0f);
@@ -520,6 +494,11 @@ class BlurOperation : public NodeOperation {
    return float2(node_storage(bnode()).percentx, node_storage(bnode()).percenty) / 100.0f;
  }

+  bool should_apply_gamma_correction()
+  {
+    return node_storage(this->bnode()).gamma;
+  }
+
  bool get_extend_bounds()
  {
    return bnode().custom1 & CMP_NODEFLAG_BLUR_EXTEND_BOUNDS;
--- a/source/blender/nodes/composite/nodes/node_composite_defocus.cc
+++ b/source/blender/nodes/composite/nodes/node_composite_defocus.cc
@@ -22,6 +22,7 @@
 #include "UI_interface.hh"
 #include "UI_resources.hh"

+#include "COM_algorithm_gamma_correct.hh"
 #include "COM_algorithm_morphological_blur.hh"
 #include "COM_bokeh_kernel.hh"
 #include "COM_node_operation.hh"
@@ -121,25 +122,48 @@ class DefocusOperation : public NodeOperation {
    const Result &bokeh_kernel = context().cache_manager().bokeh_kernels.get(
        context(), kernel_size, sides, rotation, roundness, 0.0f, 0.0f);

+    Result *defocus_input = &input;
+    Result *defocus_output = &output;
+
+    /* Apply gamma correction if needed. */
+    Result gamma_defocus_output = this->context().create_result(ResultType::Color);
+    Result gamma_corrected_input = this->context().create_result(ResultType::Color);
+    if (this->should_apply_gamma_correction()) {
+      gamma_correct(this->context(), input, gamma_corrected_input);
+      defocus_input = &gamma_corrected_input;
+      defocus_output = &gamma_defocus_output;
+    }
+
    if (this->context().use_gpu()) {
-      this->execute_gpu(radius, bokeh_kernel, maximum_defocus_radius);
+      this->execute_gpu(
+          *defocus_input, radius, bokeh_kernel, *defocus_output, maximum_defocus_radius);
    }
    else {
-      this->execute_cpu(radius, bokeh_kernel, maximum_defocus_radius);
+      this->execute_cpu(
+          *defocus_input, radius, bokeh_kernel, *defocus_output, maximum_defocus_radius);
    }

    radius.release();
+
+    /* Undo gamma correction. */
+    if (this->should_apply_gamma_correction()) {
+      gamma_corrected_input.release();
+      gamma_uncorrect(this->context(), gamma_defocus_output, output);
+      gamma_defocus_output.release();
+    }
  }

-  void execute_gpu(const Result &radius, const Result &bokeh_kernel, const int search_radius)
+  void execute_gpu(const Result &input,
+                   const Result &radius,
+                   const Result &bokeh_kernel,
+                   Result &output,
+                   const int search_radius)
  {
    GPUShader *shader = context().get_shader("compositor_defocus_blur");
    GPU_shader_bind(shader);

-    GPU_shader_uniform_1b(shader, "gamma_correct", node_storage(bnode()).gamco);
    GPU_shader_uniform_1i(shader, "search_radius", search_radius);

-    Result &input = get_input("Image");
    input.bind_as_texture(shader, "input_tx");

    radius.bind_as_texture(shader, "radius_tx");
@@ -148,7 +172,6 @@ class DefocusOperation : public NodeOperation {
    bokeh_kernel.bind_as_texture(shader, "weights_tx");

    const Domain domain = compute_domain();
-    Result &output = get_result("Image");
    output.allocate_texture(domain);
    output.bind_as_image(shader, "output_img");

@@ -161,14 +184,13 @@ class DefocusOperation : public NodeOperation {
    output.unbind_as_image();
  }

-  void execute_cpu(const Result &radius, const Result &bokeh_kernel, const int search_radius)
+  void execute_cpu(const Result &input,
+                   const Result &radius,
+                   const Result &bokeh_kernel,
+                   Result &output,
+                   const int search_radius)
  {
-    const bool gamma_correct = node_storage(bnode()).gamco;
-
-    Result &input = get_input("Image");
-
    const Domain domain = compute_domain();
-    Result &output = get_result("Image");
    output.allocate_texture(domain);

    /* Given the texel in the range [-radius, radius] in both axis, load the appropriate weight
@@ -218,10 +240,6 @@ class DefocusOperation : public NodeOperation {
          float4 weight = load_weight(int2(x, y), radius);
          float4 input_color = input.load_pixel_extended<float4>(texel + int2(x, y));

-          if (gamma_correct) {
-            input_color = gamma_correct_blur_input(input_color);
-          }
-
          accumulated_color += input_color * weight;
          accumulated_weight += weight;
        }
@@ -229,33 +247,10 @@ class DefocusOperation : public NodeOperation {

      accumulated_color = math::safe_divide(accumulated_color, accumulated_weight);

-      if (gamma_correct) {
-        accumulated_color = gamma_uncorrect_blur_output(accumulated_color);
-      }
-
      output.store_pixel(texel, accumulated_color);
    });
  }

-  /* Preprocess the input of the blur filter by squaring it in its alpha straight form, assuming
-   * the given color is alpha pre-multiplied. */
-  float4 gamma_correct_blur_input(const float4 &color)
-  {
-    float alpha = color.w > 0.0f ? color.w : 1.0f;
-    float3 corrected_color = math::square(math::max(color.xyz() / alpha, float3(0.0f))) * alpha;
-    return float4(corrected_color, color.w);
-  }
-
-  /* Postprocess the output of the blur filter by taking its square root it in its alpha straight
-   * form, assuming the given color is alpha pre-multiplied. This essential undoes the processing
-   * done by the gamma_correct_blur_input function. */
-  float4 gamma_uncorrect_blur_output(const float4 &color)
-  {
-    float alpha = color.w > 0.0f ? color.w : 1.0f;
-    float3 uncorrected_color = math::sqrt(math::max(color.xyz() / alpha, float3(0.0f))) * alpha;
-    return float4(uncorrected_color, color.w);
-  }
-
  Result compute_defocus_radius()
  {
    if (node_storage(bnode()).no_zbuf) {
@@ -525,6 +520,11 @@ class DefocusOperation : public NodeOperation {
    return math::max(1e-3f, node_storage(bnode()).fstop);
  }

+  bool should_apply_gamma_correction()
+  {
+    return node_storage(this->bnode()).gamco;
+  }
+
  const Camera *get_camera()
  {
    const Object *camera_object = get_camera_object();
--- a/source/blender/nodes/composite/nodes/node_composite_glare.cc
+++ b/source/blender/nodes/composite/nodes/node_composite_glare.cc
@@ -1143,7 +1143,6 @@ class GlareOperation : public NodeOperation {
                             small_ghost_result,
                             float2(get_small_ghost_radius()),
                             R_FILTER_GAUSS,
-                             false,
                             false);

    Result big_ghost_result = context().create_result(ResultType::Color);
@@ -1152,7 +1151,6 @@ class GlareOperation : public NodeOperation {
                             big_ghost_result,
                             float2(get_big_ghost_radius()),
                             R_FILTER_GAUSS,
-                             false,
                             false);

    Result base_ghost_result = context().create_result(ResultType::Color);