Compositor: Add Composite node for new CPU compositor

This patch implements the Composite node for the new CPU compositor. This is essentially equivalent to the Viewer node commit.
2024-08-22 14:48:52 +03:00
parent b0390cae09
commit 997ab86906
5 changed files with 173 additions and 84 deletions
--- a/source/blender/compositor/realtime_compositor/COM_context.hh
+++ b/source/blender/compositor/realtime_compositor/COM_context.hh
@@ -81,9 +81,8 @@ class Context {
   * since the region can be zero sized. */
  virtual rcti get_compositing_region() const = 0;

-  /* Get the texture where the result of the compositor should be written. This should be called by
-   * the composite output node to get its target texture. */
-  virtual GPUTexture *get_output_texture() = 0;
+  /* Get the result where the result of the compositor should be written. */
+  virtual Result get_output_result() = 0;

  /* Get the result where the result of the compositor viewer should be written, given the domain
   * of the result to be viewed and whether the output is a non-color data image. */
--- a/source/blender/draw/engines/compositor/compositor_engine.cc
+++ b/source/blender/draw/engines/compositor/compositor_engine.cc
@@ -133,9 +133,12 @@ class Context : public realtime_compositor::Context {
    return visible_camera_region;
  }

-  GPUTexture *get_output_texture() override
+  realtime_compositor::Result get_output_result() override
  {
-    return DRW_viewport_texture_list_get()->color;
+    realtime_compositor::Result result = this->create_result(
+        realtime_compositor::ResultType::Color, realtime_compositor::ResultPrecision::Half);
+    result.wrap_external(DRW_viewport_texture_list_get()->color);
+    return result;
  }

  realtime_compositor::Result get_viewer_output_result(realtime_compositor::Domain /*domain*/,
--- a/source/blender/nodes/composite/nodes/node_composite_composite.cc
+++ b/source/blender/nodes/composite/nodes/node_composite_composite.cc
@@ -6,6 +6,7 @@
 * \ingroup cmpnodes
 */

+#include "BLI_bounds_types.hh"
 #include "BLI_math_vector_types.hh"

 #include "UI_interface.hh"
@@ -77,103 +78,185 @@ class CompositeOperation : public NodeOperation {
      color.w = alpha.get_float_value();
    }

-    GPU_texture_clear(context().get_output_texture(), GPU_DATA_FLOAT, color);
+    const Domain domain = compute_domain();
+    Result output = context().get_output_result();
+    if (this->context().use_gpu()) {
+      GPU_texture_clear(output, GPU_DATA_FLOAT, color);
+    }
+    else {
+      parallel_for(domain.size, [&](const int2 texel) { output.store_pixel(texel, color); });
+    }
  }

  /* Executes when the alpha channel of the image is ignored. */
  void execute_ignore_alpha()
  {
-    GPUShader *shader = context().get_shader("compositor_write_output_opaque",
-                                             ResultPrecision::Half);
+    if (context().use_gpu()) {
+      this->execute_ignore_alpha_gpu();
+    }
+    else {
+      this->execute_ignore_alpha_cpu();
+    }
+  }
+
+  void execute_ignore_alpha_gpu()
+  {
+    const Result &image = get_input("Image");
+    const Domain domain = compute_domain();
+    Result output = context().get_output_result();
+
+    GPUShader *shader = context().get_shader("compositor_write_output_opaque", output.precision());
    GPU_shader_bind(shader);

-    /* The compositing space might be limited to a subset of the output texture, so only write into
-     * that compositing region. */
-    const rcti compositing_region = context().get_compositing_region();
-    const int2 lower_bound = int2(compositing_region.xmin, compositing_region.ymin);
-    const int2 upper_bound = int2(compositing_region.xmax, compositing_region.ymax);
-    GPU_shader_uniform_2iv(shader, "lower_bound", lower_bound);
-    GPU_shader_uniform_2iv(shader, "upper_bound", upper_bound);
+    const Bounds<int2> bounds = get_output_bounds();
+    GPU_shader_uniform_2iv(shader, "lower_bound", bounds.min);
+    GPU_shader_uniform_2iv(shader, "upper_bound", bounds.max);

-    const Result &image = get_input("Image");
    image.bind_as_texture(shader, "input_tx");

-    GPUTexture *output_texture = context().get_output_texture();
-    const int image_unit = GPU_shader_get_sampler_binding(shader, "output_img");
-    GPU_texture_image_bind(output_texture, image_unit);
+    output.bind_as_image(shader, "output_img");

-    const int2 compositing_region_size = context().get_compositing_region_size();
-    compute_dispatch_threads_at_least(shader, compositing_region_size);
+    compute_dispatch_threads_at_least(shader, domain.size);

    image.unbind_as_texture();
-    GPU_texture_image_unbind(output_texture);
+    output.unbind_as_image();
    GPU_shader_unbind();
  }

+  void execute_ignore_alpha_cpu()
+  {
+    const Domain domain = compute_domain();
+    const Result &image = get_input("Image");
+    Result output = context().get_output_result();
+
+    const Bounds<int2> bounds = get_output_bounds();
+    parallel_for(domain.size, [&](const int2 texel) {
+      const int2 output_texel = texel + bounds.min;
+      if (output_texel.x > bounds.max.x || output_texel.y > bounds.max.y) {
+        return;
+      }
+      output.store_pixel(texel + bounds.min, float4(image.load_pixel(texel).xyz(), 1.0f));
+    });
+  }
+
  /* Executes when the image texture is written with no adjustments and can thus be copied directly
-   * to the output texture. */
+   * to the output. */
  void execute_copy()
  {
-    GPUShader *shader = context().get_shader("compositor_write_output", ResultPrecision::Half);
+    if (context().use_gpu()) {
+      this->execute_copy_gpu();
+    }
+    else {
+      this->execute_copy_cpu();
+    }
+  }
+
+  void execute_copy_gpu()
+  {
+    const Result &image = get_input("Image");
+    const Domain domain = compute_domain();
+    Result output = context().get_output_result();
+
+    GPUShader *shader = context().get_shader("compositor_write_output", output.precision());
    GPU_shader_bind(shader);

-    /* The compositing space might be limited to a subset of the output texture, so only write into
-     * that compositing region. */
-    const rcti compositing_region = context().get_compositing_region();
-    const int2 lower_bound = int2(compositing_region.xmin, compositing_region.ymin);
-    const int2 upper_bound = int2(compositing_region.xmax, compositing_region.ymax);
-    GPU_shader_uniform_2iv(shader, "lower_bound", lower_bound);
-    GPU_shader_uniform_2iv(shader, "upper_bound", upper_bound);
+    const Bounds<int2> bounds = get_output_bounds();
+    GPU_shader_uniform_2iv(shader, "lower_bound", bounds.min);
+    GPU_shader_uniform_2iv(shader, "upper_bound", bounds.max);

-    const Result &image = get_input("Image");
    image.bind_as_texture(shader, "input_tx");

-    GPUTexture *output_texture = context().get_output_texture();
-    const int image_unit = GPU_shader_get_sampler_binding(shader, "output_img");
-    GPU_texture_image_bind(output_texture, image_unit);
+    output.bind_as_image(shader, "output_img");

-    const int2 compositing_region_size = context().get_compositing_region_size();
-    compute_dispatch_threads_at_least(shader, compositing_region_size);
+    compute_dispatch_threads_at_least(shader, domain.size);

    image.unbind_as_texture();
-    GPU_texture_image_unbind(output_texture);
+    output.unbind_as_image();
    GPU_shader_unbind();
  }

+  void execute_copy_cpu()
+  {
+    const Domain domain = compute_domain();
+    const Result &image = get_input("Image");
+    Result output = context().get_output_result();
+
+    const Bounds<int2> bounds = get_output_bounds();
+    parallel_for(domain.size, [&](const int2 texel) {
+      const int2 output_texel = texel + bounds.min;
+      if (output_texel.x > bounds.max.x || output_texel.y > bounds.max.y) {
+        return;
+      }
+      output.store_pixel(texel + bounds.min, image.load_pixel(texel));
+    });
+  }
+
  /* Executes when the alpha channel of the image is set as the value of the input alpha. */
  void execute_set_alpha()
  {
-    GPUShader *shader = context().get_shader("compositor_write_output_alpha",
-                                             ResultPrecision::Half);
+    if (context().use_gpu()) {
+      execute_set_alpha_gpu();
+    }
+    else {
+      execute_set_alpha_cpu();
+    }
+  }
+
+  void execute_set_alpha_gpu()
+  {
+    const Result &image = get_input("Image");
+    const Domain domain = compute_domain();
+    Result output = context().get_output_result();
+
+    GPUShader *shader = context().get_shader("compositor_write_output_alpha", output.precision());
    GPU_shader_bind(shader);

-    /* The compositing space might be limited to a subset of the output texture, so only write into
-     * that compositing region. */
-    const rcti compositing_region = context().get_compositing_region();
-    const int2 lower_bound = int2(compositing_region.xmin, compositing_region.ymin);
-    const int2 upper_bound = int2(compositing_region.xmax, compositing_region.ymax);
-    GPU_shader_uniform_2iv(shader, "lower_bound", lower_bound);
-    GPU_shader_uniform_2iv(shader, "upper_bound", upper_bound);
+    const Bounds<int2> bounds = get_output_bounds();
+    GPU_shader_uniform_2iv(shader, "lower_bound", bounds.min);
+    GPU_shader_uniform_2iv(shader, "upper_bound", bounds.max);

-    const Result &image = get_input("Image");
    image.bind_as_texture(shader, "input_tx");

    const Result &alpha = get_input("Alpha");
    alpha.bind_as_texture(shader, "alpha_tx");

-    GPUTexture *output_texture = context().get_output_texture();
-    const int image_unit = GPU_shader_get_sampler_binding(shader, "output_img");
-    GPU_texture_image_bind(output_texture, image_unit);
+    output.bind_as_image(shader, "output_img");

-    const int2 compositing_region_size = context().get_compositing_region_size();
-    compute_dispatch_threads_at_least(shader, compositing_region_size);
+    compute_dispatch_threads_at_least(shader, domain.size);

    image.unbind_as_texture();
    alpha.unbind_as_texture();
-    GPU_texture_image_unbind(output_texture);
+    output.unbind_as_image();
    GPU_shader_unbind();
  }

+  void execute_set_alpha_cpu()
+  {
+    const Domain domain = compute_domain();
+    const Result &image = get_input("Image");
+    const Result &alpha = get_input("Alpha");
+    Result output = context().get_output_result();
+
+    const Bounds<int2> bounds = get_output_bounds();
+    parallel_for(domain.size, [&](const int2 texel) {
+      const int2 output_texel = texel + bounds.min;
+      if (output_texel.x > bounds.max.x || output_texel.y > bounds.max.y) {
+        return;
+      }
+      output.store_pixel(texel + bounds.min,
+                         float4(image.load_pixel(texel).xyz(), alpha.load_pixel(texel).x));
+    });
+  }
+
+  /* Returns the bounds of the area of the compositing region. Only write into the compositing
+   * region, which might be limited to a smaller region of the output result. */
+  Bounds<int2> get_output_bounds()
+  {
+    const rcti compositing_region = context().get_compositing_region();
+    return Bounds<int2>(int2(compositing_region.xmin, compositing_region.ymin),
+                        int2(compositing_region.xmax, compositing_region.ymax));
+  }
+
  /* If true, the alpha channel of the image is set to 1, that is, it becomes opaque. If false, the
   * alpha channel of the image is retained, but only if the alpha input is not linked. If the
   * alpha input is linked, it the value of that input will be used as the alpha of the image. */
--- a/source/blender/nodes/composite/nodes/node_composite_viewer.cc
+++ b/source/blender/nodes/composite/nodes/node_composite_viewer.cc
@@ -64,7 +64,6 @@ class ViewerOperation : public NodeOperation {

    const Result &image = get_input("Image");
    const Result &alpha = get_input("Alpha");
-
    if (image.is_single_value() && alpha.is_single_value()) {
      execute_clear();
    }
--- a/source/blender/render/intern/compositor.cc
+++ b/source/blender/render/intern/compositor.cc
@@ -154,10 +154,10 @@ class Context : public realtime_compositor::Context {
  /* Input data. */
  ContextInputData input_data_;

-  /* Output combined texture. */
-  GPUTexture *output_texture_ = nullptr;
+  /* Output combined result. */
+  realtime_compositor::Result output_result_;

-  /* Viewer output texture. */
+  /* Viewer output result. */
  realtime_compositor::Result viewer_output_result_;

  /* Cached textures that the compositor took ownership of. */
@@ -167,13 +167,14 @@ class Context : public realtime_compositor::Context {
  Context(const ContextInputData &input_data, TexturePool &texture_pool)
      : realtime_compositor::Context(texture_pool),
        input_data_(input_data),
+        output_result_(this->create_result(realtime_compositor::ResultType::Color)),
        viewer_output_result_(this->create_result(realtime_compositor::ResultType::Color))
  {
  }

  virtual ~Context()
  {
-    GPU_TEXTURE_FREE_SAFE(output_texture_);
+    output_result_.release();
    viewer_output_result_.release();
    for (GPUTexture *texture : textures_) {
      GPU_texture_free(texture);
@@ -235,24 +236,24 @@ class Context : public realtime_compositor::Context {
    return render_region;
  }

-  GPUTexture *get_output_texture() override
+  realtime_compositor::Result get_output_result() override
  {
-    /* TODO: just a temporary hack, needs to get stored in RenderResult,
-     * once that supports GPU buffers. */
-    if (output_texture_ == nullptr) {
-      const int2 size = get_render_size();
-      output_texture_ = GPU_texture_create_2d(
-          "compositor_output_texture",
-          size.x,
-          size.y,
-          1,
-          get_precision() == realtime_compositor::ResultPrecision::Half ? GPU_RGBA16F :
-                                                                          GPU_RGBA32F,
-          GPU_TEXTURE_USAGE_GENERAL,
-          nullptr);
+    const int2 render_size = get_render_size();
+    if (output_result_.is_allocated()) {
+      /* If the allocated result have the same size as the render size, return it as is. */
+      if (render_size == output_result_.domain().size) {
+        return output_result_;
+      }
+      else {
+        /* Otherwise, the size changed, so release its data and reset it, then we reallocate it on
+         * the new render size below. */
+        output_result_.release();
+        output_result_.reset();
+      }
    }

-    return output_texture_;
+    output_result_.allocate_texture(render_size, false);
+    return output_result_;
  }

  realtime_compositor::Result get_viewer_output_result(realtime_compositor::Domain domain,
@@ -446,7 +447,7 @@ class Context : public realtime_compositor::Context {

  void output_to_render_result()
  {
-    if (!output_texture_) {
+    if (!output_result_.is_allocated()) {
      return;
    }

@@ -455,18 +456,22 @@ class Context : public realtime_compositor::Context {

    if (rr) {
      RenderView *rv = RE_RenderViewGetByName(rr, input_data_.view_name.c_str());
+      ImBuf *ibuf = RE_RenderViewEnsureImBuf(rr, rv);
+      rr->have_combined = true;

-      GPU_memory_barrier(GPU_BARRIER_TEXTURE_UPDATE);
-      float *output_buffer = (float *)GPU_texture_read(output_texture_, GPU_DATA_FLOAT, 0);
-
-      if (output_buffer) {
-        ImBuf *ibuf = RE_RenderViewEnsureImBuf(rr, rv);
+      if (this->use_gpu()) {
+        GPU_memory_barrier(GPU_BARRIER_TEXTURE_UPDATE);
+        float *output_buffer = static_cast<float *>(
+            GPU_texture_read(output_result_, GPU_DATA_FLOAT, 0));
        IMB_assign_float_buffer(ibuf, output_buffer, IB_TAKE_OWNERSHIP);
      }
-
-      /* TODO: z-buffer output. */
-
-      rr->have_combined = true;
+      else {
+        float *data = static_cast<float *>(
+            MEM_malloc_arrayN(rr->rectx * rr->recty, 4 * sizeof(float), __func__));
+        IMB_assign_float_buffer(ibuf, data, IB_TAKE_OWNERSHIP);
+        std::memcpy(
+            data, output_result_.float_texture(), rr->rectx * rr->recty * 4 * sizeof(float));
+      }
    }

    if (re) {