Fix: GPU Directional Blur node does not match CPU

The GPU Directional Blur node does not match CPU. This is because GPU accumulates the scale, while the CPU increments it. This patch incremenets the scale for the GPU to make them match.
2024-01-08 13:42:32 +02:00
parent 2db0dd6542
commit 08130211b1
3 changed files with 48 additions and 30 deletions
--- a/source/blender/compositor/realtime_compositor/shaders/compositor_directional_blur.glsl
+++ b/source/blender/compositor/realtime_compositor/shaders/compositor_directional_blur.glsl
@@ -9,17 +9,38 @@ void main()
  ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
  ivec2 input_size = texture_size(input_tx);

-  /* Add 0.5 to evaluate the input sampler at the center of the pixel. */
-  vec2 coordinates = vec2(texel) + vec2(0.5);
+  vec2 coordinates = (vec2(texel) + vec2(0.5)) / vec2(input_size);

-  /* For each iteration, accumulate the input at the normalize coordinates, hence the divide by
-   * input size, then transform the coordinates for the next iteration. */
+  float current_sin = 0.0;
+  float current_cos = 1.0;
+  float current_scale = 1.0;
+  vec2 current_translation = vec2(0.0);
+
+  /* For each iteration, accumulate the input at the transformed coordinates, then increment the
+   * transformations for the next iteration. */
  vec4 accumulated_color = vec4(0.0);
  for (int i = 0; i < iterations; i++) {
-    accumulated_color += texture(input_tx, coordinates / vec2(input_size));
-    coordinates = (mat3(inverse_transformation) * vec3(coordinates, 1.0)).xy;
+    /* Transform the coordinates by first offsetting the origin, scaling, translating, rotating,
+     * then finally restoring the origin. Notice that we do the inverse of each of the transforms,
+     * since we are transforming the coordinates, not the image. */
+    vec2 transformed_coordinates = coordinates;
+    transformed_coordinates -= origin;
+    transformed_coordinates /= current_scale;
+    transformed_coordinates -= current_translation;
+    transformed_coordinates *= mat2(current_cos, current_sin, -current_sin, current_cos);
+    transformed_coordinates += origin;
+
+    accumulated_color += texture(input_tx, transformed_coordinates);
+
+    current_scale += scale;
+    current_translation += translation;
+
+    /* Those are the sine and cosine addition identities. Used to avoid computing sine and cosine
+     * at each iteration. */
+    float new_sin = current_sin * rotation_cos + current_cos * rotation_sin;
+    current_cos = current_cos * rotation_cos - current_sin * rotation_sin;
+    current_sin = new_sin;
  }

-  /* Write the accumulated color divided by the number of iterations. */
  imageStore(output_img, texel, accumulated_color / iterations);
 }
--- a/source/blender/compositor/realtime_compositor/shaders/infos/compositor_directional_blur_info.hh
+++ b/source/blender/compositor/realtime_compositor/shaders/infos/compositor_directional_blur_info.hh
@@ -7,7 +7,11 @@
 GPU_SHADER_CREATE_INFO(compositor_directional_blur)
    .local_group_size(16, 16)
    .push_constant(Type::INT, "iterations")
-    .push_constant(Type::MAT4, "inverse_transformation")
+    .push_constant(Type::VEC2, "origin")
+    .push_constant(Type::VEC2, "translation")
+    .push_constant(Type::FLOAT, "rotation_sin")
+    .push_constant(Type::FLOAT, "rotation_cos")
+    .push_constant(Type::FLOAT, "scale")
    .sampler(0, ImageType::FLOAT_2D, "input_tx")
    .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
    .compute_source("compositor_directional_blur.glsl")
--- a/source/blender/nodes/composite/nodes/node_composite_directionalblur.cc
+++ b/source/blender/nodes/composite/nodes/node_composite_directionalblur.cc
@@ -83,7 +83,11 @@ class DirectionalBlurOperation : public NodeOperation {
     * transformation. So add an extra iteration for the original image and put that into
     * consideration in the shader. */
    GPU_shader_uniform_1i(shader, "iterations", get_iterations() + 1);
-    GPU_shader_uniform_mat3_as_mat4(shader, "inverse_transformation", get_transformation().ptr());
+    GPU_shader_uniform_2fv(shader, "origin", get_origin());
+    GPU_shader_uniform_2fv(shader, "translation", get_translation());
+    GPU_shader_uniform_1f(shader, "rotation_sin", math::sin(get_rotation()));
+    GPU_shader_uniform_1f(shader, "rotation_cos", math::cos(get_rotation()));
+    GPU_shader_uniform_1f(shader, "scale", get_scale());

    const Result &input_image = get_input("Image");
    GPU_texture_filter_mode(input_image.texture(), true);
@@ -102,16 +106,18 @@ class DirectionalBlurOperation : public NodeOperation {
    input_image.unbind_as_texture();
  }

-  /* Get the amount of translation that will be applied on each iteration. The translation is in
-   * the negative x direction rotated in the clock-wise direction, hence the negative sign for the
-   * rotation and translation vector. */
+  /* Get the amount of translation relative to the image size that will be applied on each
+   * iteration. The translation is in the negative x direction rotated in the clock-wise direction,
+   * hence the negative sign for the rotation and translation vector. */
  float2 get_translation()
  {
-    const float diagonal_length = math::length(float2(get_input("Image").domain().size));
+    const float2 input_size = float2(get_input("Image").domain().size);
+    const float diagonal_length = math::length(input_size);
    const float translation_amount = diagonal_length * node_storage(bnode()).distance;
    const float2x2 rotation = math::from_rotation<float2x2>(
        math::AngleRadian(-node_storage(bnode()).angle));
-    return rotation * float2(-translation_amount / get_iterations(), 0.0f);
+    const float2 translation = rotation * float2(-translation_amount / get_iterations(), 0.0f);
+    return translation / input_size;
  }

  /* Get the amount of rotation that will be applied on each iteration. */
@@ -122,27 +128,14 @@ class DirectionalBlurOperation : public NodeOperation {

  /* Get the amount of scale that will be applied on each iteration. The scale is identity when the
   * user supplies 0, so we add 1. */
-  float2 get_scale()
+  float get_scale()
  {
-    return float2(1.0f + node_storage(bnode()).zoom / get_iterations());
+    return node_storage(bnode()).zoom / get_iterations();
  }

  float2 get_origin()
  {
-    const float2 center = float2(node_storage(bnode()).center_x, node_storage(bnode()).center_y);
-    return float2(get_input("Image").domain().size) * center;
-  }
-
-  float3x3 get_transformation()
-  {
-    /* Construct the transformation that will be applied on each iteration. */
-    const float3x3 transformation = math::from_loc_rot_scale<float3x3>(
-        get_translation(), math::AngleRadian(get_rotation()), get_scale());
-    /* Change the origin of the transformation to the user-specified origin. */
-    const float3x3 origin_transformation = math::from_origin_transform<float3x3>(transformation,
-                                                                                 get_origin());
-    /* The shader will transform the coordinates, not the image itself, so take the inverse. */
-    return math::invert(origin_transformation);
+    return float2(node_storage(bnode()).center_x, node_storage(bnode()).center_y);
  }

  /* The actual number of iterations is 2 to the power of the user supplied iterations. The power