Compositor: Implement Cryptomatte for new CPU compositor

Reference #125968.
2024-11-19 12:47:23 +02:00
parent 8528407a18
commit 54fa1f9bb7
1 changed files with 184 additions and 44 deletions
--- a/source/blender/nodes/composite/nodes/node_composite_cryptomatte.cc
+++ b/source/blender/nodes/composite/nodes/node_composite_cryptomatte.cc
@@ -6,6 +6,7 @@
 * \ingroup cmpnodes
 */

+#include <cstring>
 #include <string>

 #include <fmt/format.h>
@@ -236,7 +237,7 @@ class BaseCryptoMatteOperation : public NodeOperation {
  virtual Result &get_input_image() = 0;

  /* Should returns all the Cryptomatte layers in order. */
-  virtual Vector<GPUTexture *> get_layers() = 0;
+  virtual Vector<Result> get_layers() = 0;

  /* If only a subset area of the Cryptomatte layers is to be considered, this method should return
   * the lower bound of that area. The upper bound will be derived from the operation domain. */
@@ -247,18 +248,7 @@ class BaseCryptoMatteOperation : public NodeOperation {

  void execute() override
  {
-    /* Not yet supported on CPU. */
-    if (!context().use_gpu()) {
-      for (const bNodeSocket *output : this->node()->output_sockets()) {
-        Result &output_result = get_result(output->identifier);
-        if (output_result.should_compute()) {
-          output_result.allocate_invalid();
-        }
-      }
-      return;
-    }
-
-    Vector<GPUTexture *> layers = get_layers();
+    Vector<Result> layers = get_layers();
    if (layers.is_empty()) {
      allocate_invalid();
      return;
@@ -309,41 +299,105 @@ class BaseCryptoMatteOperation : public NodeOperation {

  /* Computes the pick result, which is a special human-viewable image that the user can pick
   * entities from using the Cryptomatte picker operator. See the shader for more information. */
-  void compute_pick(Vector<GPUTexture *> &layers)
+  void compute_pick(const Vector<Result> &layers)
  {
-    /* See the comment below for why full precision is necessary. */
+    Result &output_pick = get_result("Pick");
+    /* Promote to full precision since it stores the identifiers of the first Cryptomatte rank,
+     * which is a 32-bit float. See the shader for more information. */
+    output_pick.set_precision(ResultPrecision::Full);
+    /* Inform viewers that the pick result should not be color managed. */
+    output_pick.meta_data.is_non_color_data = true;
+
+    if (this->context().use_gpu()) {
+      this->compute_pick_gpu(layers);
+    }
+    else {
+      this->compute_pick_cpu(layers);
+    }
+  }
+
+  void compute_pick_gpu(const Vector<Result> &layers)
+  {
+    /* See this->compute_pick for why full precision is necessary. */
    GPUShader *shader = context().get_shader("compositor_cryptomatte_pick", ResultPrecision::Full);
    GPU_shader_bind(shader);

    const int2 lower_bound = this->get_layers_lower_bound();
    GPU_shader_uniform_2iv(shader, "lower_bound", lower_bound);

-    GPUTexture *first_layer = layers[0];
-    const int input_unit = GPU_shader_get_sampler_binding(shader, "first_layer_tx");
-    GPU_texture_bind(first_layer, input_unit);
-
-    Result &output_pick = get_result("Pick");
-
-    /* Promote to full precision since it stores the identifiers of the first Cryptomatte rank,
-     * which is a 32-bit float. See the shader for more information. */
-    output_pick.set_precision(ResultPrecision::Full);
-
-    output_pick.meta_data.is_non_color_data = true;
+    const Result &first_layer = layers[0];
+    first_layer.bind_as_texture(shader, "first_layer_tx");

    const Domain domain = compute_domain();
+    Result &output_pick = get_result("Pick");
    output_pick.allocate_texture(domain);
    output_pick.bind_as_image(shader, "output_img");

    compute_dispatch_threads_at_least(shader, domain.size);

    GPU_shader_unbind();
-    GPU_texture_unbind(first_layer);
+    first_layer.unbind_as_texture();
    output_pick.unbind_as_image();
  }

+  void compute_pick_cpu(const Vector<Result> &layers)
+  {
+    const int2 lower_bound = this->get_layers_lower_bound();
+
+    const Result &first_layer = layers[0];
+
+    const Domain domain = this->compute_domain();
+    Result &output = this->get_result("Pick");
+    output.allocate_texture(domain);
+
+    /* Blender provides a Cryptomatte picker operator (UI_OT_eyedropper_color) that can pick a
+     * Cryptomatte entity from an image. That image is a specially encoded image that the picker
+     * operator can understand. In particular, its red channel is the identifier of the entity in
+     * the first rank, while the green and blue channels are arbitrary [0, 1] compressed versions
+     * of the identifier to make the image more humane-viewable, but they are actually ignored by
+     * the picker operator, as can be seen in functions like eyedropper_color_sample_text_update,
+     * where only the red channel is considered.
+     *
+     * This shader just computes this special image given the first Cryptomatte layer. The output
+     * needs to be in full precision since the identifier is a 32-bit float.
+     *
+     * This is the same concept as the "keyable" image described in section "Matte Extraction:
+     * Implementation Details" in the original Cryptomatte publication:
+     *
+     *   Friedman, Jonah, and Andrew C. Jones. "Fully automatic id mattes with support for motion
+     * blur and transparency." ACM SIGGRAPH 2015 Posters. 2015. 1-1.
+     *
+     * Except we put the identifier in the red channel by convention instead of the suggested blue
+     * channel. */
+    parallel_for(domain.size, [&](const int2 texel) {
+      /* Each layer stores two ranks, each rank contains a pair, the identifier and the coverage of
+       * the entity identified by the identifier. */
+      float2 first_rank = first_layer.load_pixel(texel + lower_bound).xy();
+      float id_of_first_rank = first_rank.x;
+
+      /* There is no logic to this, we just compute arbitrary compressed versions of the identifier
+       * in the [0, 1] range to make the image more human-viewable. */
+      uint32_t hash_value;
+      std::memcpy(&hash_value, &id_of_first_rank, sizeof(uint32_t));
+      float green = float(hash_value << 8) / float(0xFFFFFFFFu);
+      float blue = float(hash_value << 16) / float(0xFFFFFFFFu);
+
+      output.store_pixel(texel, float4(id_of_first_rank, green, blue, 1.0f));
+    });
+  }
+
  /* Computes and returns the matte by accumulating the coverage of all entities whose identifiers
   * are selected by the user, across all layers. See the shader for more information. */
-  Result compute_matte(Vector<GPUTexture *> &layers)
+  Result compute_matte(const Vector<Result> &layers)
+  {
+    if (this->context().use_gpu()) {
+      return this->compute_matte_gpu(layers);
+    }
+
+    return this->compute_matte_cpu(layers);
+  }
+
+  Result compute_matte_gpu(const Vector<Result> &layers)
  {
    const Domain domain = compute_domain();
    Result output_matte = context().create_result(ResultType::Float);
@@ -367,16 +421,15 @@ class BaseCryptoMatteOperation : public NodeOperation {
    GPU_shader_uniform_1i(shader, "identifiers_count", identifiers.size());
    GPU_shader_uniform_1f_array(shader, "identifiers", identifiers.size(), identifiers.data());

-    for (GPUTexture *layer : layers) {
-      const int input_unit = GPU_shader_get_sampler_binding(shader, "layer_tx");
-      GPU_texture_bind(layer, input_unit);
+    for (const Result &layer : layers) {
+      layer.bind_as_texture(shader, "layer_tx");

      /* Bind the matte with read access, since we will be accumulating in it. */
      output_matte.bind_as_image(shader, "matte_img", true);

      compute_dispatch_threads_at_least(shader, domain.size);

-      GPU_texture_unbind(layer);
+      layer.unbind_as_texture();
      output_matte.unbind_as_image();
    }

@@ -385,8 +438,78 @@ class BaseCryptoMatteOperation : public NodeOperation {
    return output_matte;
  }

+  Result compute_matte_cpu(const Vector<Result> &layers)
+  {
+    const Domain domain = compute_domain();
+    Result matte = context().create_result(ResultType::Float);
+    matte.allocate_texture(domain);
+
+    /* Clear the matte to zero to ready it to accumulate the coverage. */
+    parallel_for(domain.size, [&](const int2 texel) { matte.store_pixel(texel, float4(0.0f)); });
+
+    Vector<float> identifiers = get_identifiers();
+    /* The user haven't selected any entities, return the currently zero matte. */
+    if (identifiers.is_empty()) {
+      return matte;
+    }
+
+    const int2 lower_bound = this->get_layers_lower_bound();
+    for (const Result &layer_result : layers) {
+      /* Loops over all identifiers selected by the user, and accumulate the coverage of ranks
+       * whose identifiers match that of the user selected identifiers.
+       *
+       * This is described in section "Matte Extraction: Implementation Details" in the original
+       * Cryptomatte publication:
+       *
+       *   Friedman, Jonah, and Andrew C. Jones. "Fully automatic id mattes with support for motion
+       * blur and transparency." ACM SIGGRAPH 2015 Posters. 2015. 1-1.
+       */
+      parallel_for(domain.size, [&](const int2 texel) {
+        float4 layer = layer_result.load_pixel(texel + lower_bound);
+
+        /* Each Cryptomatte layer stores two ranks. */
+        float2 first_rank = layer.xy();
+        float2 second_rank = layer.zw();
+
+        /* Each Cryptomatte rank stores a pair of an identifier and the coverage of the entity
+         * identified by that identifier. */
+        float identifier_of_first_rank = first_rank.x;
+        float coverage_of_first_rank = first_rank.y;
+        float identifier_of_second_rank = second_rank.x;
+        float coverage_of_second_rank = second_rank.y;
+
+        /* Loop over all identifiers selected by the user, if the identifier of either of the ranks
+         * match it, accumulate its coverage. */
+        float total_coverage = 0.0f;
+        for (const float &identifier : identifiers) {
+          if (identifier_of_first_rank == identifier) {
+            total_coverage += coverage_of_first_rank;
+          }
+          if (identifier_of_second_rank == identifier) {
+            total_coverage += coverage_of_second_rank;
+          }
+        }
+
+        /* Add the total coverage to the coverage accumulated by previous layers. */
+        matte.store_pixel(texel, float4(matte.load_pixel(texel).x + total_coverage));
+      });
+    }
+
+    return matte;
+  }
+
  /* Computes the output image result by pre-multiplying the matte to the image. */
-  void compute_image(Result &matte)
+  void compute_image(const Result &matte)
+  {
+    if (this->context().use_gpu()) {
+      this->compute_image_gpu(matte);
+    }
+    else {
+      this->compute_image_cpu(matte);
+    }
+  }
+
+  void compute_image_gpu(const Result &matte)
  {
    GPUShader *shader = context().get_shader("compositor_cryptomatte_image");
    GPU_shader_bind(shader);
@@ -409,6 +532,23 @@ class BaseCryptoMatteOperation : public NodeOperation {
    image_output.unbind_as_image();
  }

+  void compute_image_cpu(const Result &matte)
+  {
+    Result &input = get_input_image();
+
+    const Domain domain = compute_domain();
+    Result &output = get_result("Image");
+    output.allocate_texture(domain);
+
+    parallel_for(domain.size, [&](const int2 texel) {
+      float4 input_color = input.load_pixel(texel);
+      float input_matte = matte.load_pixel(texel).x;
+
+      /* Premultiply the alpha to the image. */
+      output.store_pixel(texel, input_color * input_matte);
+    });
+  }
+
  /* Get the identifiers of the entities selected by the user to generate a matte from. The
   * identifiers are hashes of the names of the entities encoded in floats. See the "ID Generation"
   * section of the Cryptomatte specification for more information. */
@@ -531,7 +671,7 @@ class CryptoMatteOperation : public BaseCryptoMatteOperation {
  }

  /* Returns all the relevant Cryptomatte layers from the selected source. */
-  Vector<GPUTexture *> get_layers() override
+  Vector<Result> get_layers() override
  {
    switch (get_source()) {
      case CMP_NODE_CRYPTOMATTE_SOURCE_RENDER:
@@ -541,13 +681,13 @@ class CryptoMatteOperation : public BaseCryptoMatteOperation {
    }

    BLI_assert_unreachable();
-    return Vector<GPUTexture *>();
+    return Vector<Result>();
  }

  /* Returns all the relevant Cryptomatte layers from the selected layer. */
-  Vector<GPUTexture *> get_layers_from_render()
+  Vector<Result> get_layers_from_render()
  {
-    Vector<GPUTexture *> layers;
+    Vector<Result> layers;

    Scene *scene = get_scene();
    if (!scene) {
@@ -584,14 +724,14 @@ class CryptoMatteOperation : public BaseCryptoMatteOperation {
      const int cryptomatte_layers_count = int(math::ceil(view_layer->cryptomatte_levels / 2.0f));
      for (int i = 0; i < cryptomatte_layers_count; i++) {
        const std::string pass_name = fmt::format("{}{:02}", cryptomatte_type, i);
-        GPUTexture *pass_texture = context().get_pass(scene, view_layer_index, pass_name.c_str());
+        Result pass_result = context().get_pass(scene, view_layer_index, pass_name.c_str());

        /* If this Cryptomatte layer wasn't found, then all later Cryptomatte layers can't be used
         * even if they were found. */
-        if (!pass_texture) {
+        if (!pass_result.is_allocated()) {
          return layers;
        }
-        layers.append(pass_texture);
+        layers.append(pass_result);
      }

      /* The target view later was processed already, no need to check other view layers. */
@@ -602,9 +742,9 @@ class CryptoMatteOperation : public BaseCryptoMatteOperation {
  }

  /* Returns all the relevant Cryptomatte layers from the selected EXR image. */
-  Vector<GPUTexture *> get_layers_from_image()
+  Vector<Result> get_layers_from_image()
  {
-    Vector<GPUTexture *> layers;
+    Vector<Result> layers;

    Image *image = get_image();
    if (!image || image->type != IMA_TYPE_MULTILAYER) {
@@ -839,9 +979,9 @@ class LegacyCryptoMatteOperation : public BaseCryptoMatteOperation {
    return get_input("image");
  }

-  Vector<GPUTexture *> get_layers() override
+  Vector<Result> get_layers() override
  {
-    Vector<GPUTexture *> layers;
+    Vector<Result> layers;
    /* Add all textures of all inputs except the first input, which is the input image. */
    for (const bNodeSocket *socket : bnode().input_sockets().drop_front(1)) {
      layers.append(get_input(socket->identifier));