Compositor: Implement CPU domain realization

This patch implements the domain realization algorithm for the new CPU compositor. Only nearest interpolation with no wrapping is implemented at the moment. A new sampling method was added to the result class and some relevant methods were moved into inline functions.
2024-10-11 12:12:24 +03:00
parent 7f48c931a4
commit 317cf37680
3 changed files with 179 additions and 103 deletions
--- a/source/blender/compositor/realtime_compositor/COM_result.hh
+++ b/source/blender/compositor/realtime_compositor/COM_result.hh
@@ -4,7 +4,10 @@

 #pragma once

+#include "BLI_assert.h"
+#include "BLI_math_interp.hh"
 #include "BLI_math_matrix_types.hh"
+#include "BLI_math_vector.h"
 #include "BLI_math_vector_types.hh"

 #include "GPU_shader.hh"
@@ -363,7 +366,7 @@ class Result {
  const Domain &domain() const;

  /* Returns a reference to the allocate float data. */
-  float *float_texture();
+  float *float_texture() const;

  /* Loads the float pixel at the given texel coordinates and returns it in a float4. If the number
   * of channels in the result are less than 4, then the rest of the returned float4 will have its
@@ -377,6 +380,12 @@ class Result {
   * float4 will be ignored. This is similar to how the imageStore function in GLSL works. */
  void store_pixel(const int2 &texel, const float4 &pixel_value);

+  /* Equivalent to the GLSL texture() function with nearest interpolation and zero boundary
+   * conditions. The coordinates are thus expected to have half-pixels offsets. A float4 is always
+   * returned regardless of the number of channels of the buffer, the remaining channels will be
+   * initialized with the template float4(0, 0, 0, 1). */
+  float4 sample_nearest_zero(const float2 coordinates) const;
+
 private:
  /* Allocates the texture data for the given size, either on the GPU or CPU based on the result's
   * context. See the allocate_texture method for information about the from_pool argument. */
@@ -392,4 +401,99 @@ class Result {
  void copy_pixel(float *target, const float *source) const;
 };

+/* -------------------------------------------------------------------- */
+/* Inline Methods.
+ */
+
+inline float4 Result::sample_nearest_zero(const float2 coordinates) const
+{
+  float4 pixel_value = float4(0.0f, 0.0f, 0.0f, 1.0f);
+  if (is_single_value_) {
+    this->copy_pixel(pixel_value, float_texture_);
+    return pixel_value;
+  }
+
+  const int2 size = domain_.size;
+  const float2 texel_coordinates = coordinates * float2(size);
+
+  math::interpolate_nearest_border_fl(this->float_texture(),
+                                      pixel_value,
+                                      size.x,
+                                      size.y,
+                                      this->channels_count(),
+                                      texel_coordinates.x,
+                                      texel_coordinates.y);
+  return pixel_value;
+}
+
+inline const Domain &Result::domain() const
+{
+  return domain_;
+}
+
+inline float *Result::float_texture() const
+{
+  BLI_assert(storage_type_ == ResultStorageType::FloatCPU);
+  return float_texture_;
+}
+
+inline float4 Result::load_pixel(const int2 &texel) const
+{
+  float4 pixel_value = float4(0.0f, 0.0f, 0.0f, 1.0f);
+  if (is_single_value_) {
+    this->copy_pixel(pixel_value, float_texture_);
+  }
+  else {
+    this->copy_pixel(pixel_value, this->get_float_pixel(texel));
+  }
+  return pixel_value;
+}
+
+inline void Result::store_pixel(const int2 &texel, const float4 &pixel_value)
+{
+  this->copy_pixel(this->get_float_pixel(texel), pixel_value);
+}
+
+inline int64_t Result::channels_count() const
+{
+  switch (type_) {
+    case ResultType::Float:
+      return 1;
+    case ResultType::Float2:
+    case ResultType::Int2:
+      return 2;
+    case ResultType::Float3:
+      return 3;
+    case ResultType::Vector:
+    case ResultType::Color:
+      return 4;
+  }
+  return 4;
+}
+
+inline float *Result::get_float_pixel(const int2 &texel) const
+{
+  return float_texture_ + (texel.y * domain_.size.x + texel.x) * this->channels_count();
+}
+
+inline void Result::copy_pixel(float *target, const float *source) const
+{
+  switch (type_) {
+    case ResultType::Float:
+      *target = *source;
+      break;
+    case ResultType::Float2:
+    case ResultType::Int2:
+      copy_v2_v2(target, source);
+      break;
+    case ResultType::Float3:
+      copy_v3_v3(target, source);
+      break;
+    case ResultType::Vector:
+    case ResultType::Color:
+      copy_v4_v4(target, source);
+      break;
+  }
+}
+
 }  // namespace blender::realtime_compositor
--- a/source/blender/compositor/realtime_compositor/algorithms/intern/realize_on_domain.cc
+++ b/source/blender/compositor/realtime_compositor/algorithms/intern/realize_on_domain.cc
@@ -61,42 +61,16 @@ static const char *get_realization_shader(Result &input,
  return nullptr;
 }

-void realize_on_domain(Context &context,
-                       Result &input,
-                       Result &output,
-                       const Domain &domain,
-                       const float3x3 &input_transformation,
-                       const RealizationOptions &realization_options)
+static void realize_on_domain_gpu(Context &context,
+                                  Result &input,
+                                  Result &output,
+                                  const Domain &domain,
+                                  const float3x3 &inverse_transformation,
+                                  const RealizationOptions &realization_options)
 {
-  const Domain input_domain = Domain(input.domain().size, input_transformation);
-  if (input_domain == domain) {
-    input.pass_through(output);
-    output.set_transformation(domain.transformation);
-    return;
-  }
-
  GPUShader *shader = context.get_shader(get_realization_shader(input, realization_options));
  GPU_shader_bind(shader);

-  /* Translation from lower-left corner to center of input space. */
-  float2 input_translate(-float2(input_domain.size) / 2.0f);
-
-  /* Bias translations in case of nearest interpolation to avoids the round-to-even behavior of
-   * some GPUs at pixel boundaries. */
-  if (realization_options.interpolation == Interpolation::Nearest) {
-    input_translate += std::numeric_limits<float>::epsilon() * 10e3f;
-  }
-
-  /* Transformation from input domain with 0,0 in lower-left to virtual compositing space. */
-  const float3x3 in_transformation = math::translate(input_transformation, input_translate);
-
-  /* Transformation from output domain with 0,0 in lower-left to virtual compositing space. */
-  const float3x3 out_transformation = math::translate(domain.transformation,
-                                                      -float2(domain.size) / 2.0f);
-
-  /* Concatenate to get full transform from output space to input space */
-  const float3x3 inverse_transformation = math::invert(in_transformation) * out_transformation;
-
  GPU_shader_uniform_mat3_as_mat4(shader, "inverse_transformation", inverse_transformation.ptr());

  /* The texture sampler should use bilinear interpolation for both the bilinear and bicubic
@@ -127,4 +101,72 @@ void realize_on_domain(Context &context,
  GPU_shader_unbind();
 }

+static void realize_on_domain_cpu(Result &input,
+                                  Result &output,
+                                  const Domain &domain,
+                                  const float3x3 &inverse_transformation)
+{
+  output.allocate_texture(domain);
+
+  parallel_for(domain.size, [&](const int2 texel) {
+    /* Add 0.5 to evaluate the input sampler at the center of the pixel. */
+    float2 coordinates = float2(texel) + float2(0.5f);
+
+    /* Transform the input image by transforming the domain coordinates with the inverse of input
+     * image's transformation. The inverse transformation is an affine matrix and thus the
+     * coordinates should be in homogeneous coordinates. */
+    coordinates = (inverse_transformation * float3(coordinates, 1.0f)).xy();
+
+    /* Subtract the offset and divide by the input image size to get the relevant coordinates into
+     * the sampler's expected [0, 1] range. */
+    const int2 input_size = input.domain().size;
+    float2 normalized_coordinates = coordinates / float2(input_size);
+
+    /* TODO: Support other interpolations and wrapping modes. */
+    output.store_pixel(texel, input.sample_nearest_zero(normalized_coordinates));
+  });
+}
+
+void realize_on_domain(Context &context,
+                       Result &input,
+                       Result &output,
+                       const Domain &domain,
+                       const float3x3 &input_transformation,
+                       const RealizationOptions &realization_options)
+{
+  const Domain input_domain = Domain(input.domain().size, input_transformation);
+  if (input_domain == domain) {
+    input.pass_through(output);
+    output.set_transformation(domain.transformation);
+    return;
+  }
+
+  /* Translation from lower-left corner to center of input space. */
+  float2 input_translate(-float2(input_domain.size) / 2.0f);
+
+  /* Bias translations in case of nearest interpolation to avoids the round-to-even behavior of
+   * some GPUs at pixel boundaries. */
+  if (realization_options.interpolation == Interpolation::Nearest) {
+    input_translate += std::numeric_limits<float>::epsilon() * 10e3f;
+  }
+
+  /* Transformation from input domain with 0,0 in lower-left to virtual compositing space. */
+  const float3x3 in_transformation = math::translate(input_transformation, input_translate);
+
+  /* Transformation from output domain with 0,0 in lower-left to virtual compositing space. */
+  const float3x3 out_transformation = math::translate(domain.transformation,
+                                                      -float2(domain.size) / 2.0f);
+
+  /* Concatenate to get full transform from output space to input space */
+  const float3x3 inverse_transformation = math::invert(in_transformation) * out_transformation;
+
+  if (context.use_gpu()) {
+    realize_on_domain_gpu(
+        context, input, output, domain, inverse_transformation, realization_options);
+  }
+  else {
+    realize_on_domain_cpu(input, output, domain, inverse_transformation);
+  }
+}
+
 }  // namespace blender::realtime_compositor
--- a/source/blender/compositor/realtime_compositor/intern/result.cc
+++ b/source/blender/compositor/realtime_compositor/intern/result.cc
@@ -709,34 +709,6 @@ int Result::reference_count() const
  return reference_count_;
 }

-const Domain &Result::domain() const
-{
-  return domain_;
-}
-
-float *Result::float_texture()
-{
-  BLI_assert(storage_type_ == ResultStorageType::FloatCPU);
-  return float_texture_;
-}
-
-float4 Result::load_pixel(const int2 &texel) const
-{
-  float4 pixel_value = float4(0.0f, 0.0f, 0.0f, 1.0f);
-  if (is_single_value_) {
-    this->copy_pixel(pixel_value, float_texture_);
-  }
-  else {
-    this->copy_pixel(pixel_value, this->get_float_pixel(texel));
-  }
-  return pixel_value;
-}
-
-void Result::store_pixel(const int2 &texel, const float4 &pixel_value)
-{
-  this->copy_pixel(this->get_float_pixel(texel), pixel_value);
-}
-
 void Result::allocate_data(int2 size, bool from_pool)
 {
  if (context_->use_gpu()) {
@@ -774,46 +746,4 @@ void Result::allocate_data(int2 size, bool from_pool)
  }
 }

-int64_t Result::channels_count() const
-{
-  switch (type_) {
-    case ResultType::Float:
-      return 1;
-    case ResultType::Float2:
-    case ResultType::Int2:
-      return 2;
-    case ResultType::Float3:
-      return 3;
-    case ResultType::Vector:
-    case ResultType::Color:
-      return 4;
-  }
-  return 4;
-}
-
-float *Result::get_float_pixel(const int2 &texel) const
-{
-  return float_texture_ + (texel.y * domain_.size.x + texel.x) * this->channels_count();
-}
-
-void Result::copy_pixel(float *target, const float *source) const
-{
-  switch (type_) {
-    case ResultType::Float:
-      *target = *source;
-      break;
-    case ResultType::Float2:
-    case ResultType::Int2:
-      copy_v2_v2(target, source);
-      break;
-    case ResultType::Float3:
-      copy_v3_v3(target, source);
-      break;
-    case ResultType::Vector:
-    case ResultType::Color:
-      copy_v4_v4(target, source);
-      break;
-  }
-}
-
 }  // namespace blender::realtime_compositor