Compositor: Add Convolve node

This patch adds a simple Convolve node to the compositor. Pull Request: https://projects.blender.org/blender/blender/pulls/144619
2025-08-28 15:58:39 +02:00
parent 90a55a3e0b
commit fdf95b53fa
9 changed files with 473 additions and 2 deletions
--- a/scripts/startup/bl_ui/node_add_menu_compositor.py
+++ b/scripts/startup/bl_ui/node_add_menu_compositor.py
@@ -139,6 +139,7 @@ class NODE_MT_category_compositor_filter(Menu):
        layout.menu("NODE_MT_category_compositor_filter_blur")
        layout.separator()
        node_add_menu.add_node_type(layout, "CompositorNodeAntiAliasing")
+        node_add_menu.add_node_type(layout, "CompositorNodeConvolve")
        node_add_menu.add_node_type(layout, "CompositorNodeDenoise")
        node_add_menu.add_node_type(layout, "CompositorNodeDespeckle")
        layout.separator()
--- a/source/blender/compositor/CMakeLists.txt
+++ b/source/blender/compositor/CMakeLists.txt
@@ -69,6 +69,7 @@ set(SRC
  intern/utilities.cc

  algorithms/intern/compute_preview.cc
+  algorithms/intern/convolve.cc
  algorithms/intern/deriche_gaussian_blur.cc
  algorithms/intern/extract_alpha.cc
  algorithms/intern/jump_flooding.cc
@@ -86,6 +87,7 @@ set(SRC
  algorithms/intern/van_vliet_gaussian_blur.cc

  algorithms/COM_algorithm_compute_preview.hh
+  algorithms/COM_algorithm_convolve.hh
  algorithms/COM_algorithm_deriche_gaussian_blur.hh
  algorithms/COM_algorithm_extract_alpha.hh
  algorithms/COM_algorithm_jump_flooding.hh
--- a/source/blender/compositor/COM_result.hh
+++ b/source/blender/compositor/COM_result.hh
@@ -238,7 +238,12 @@ class Result {
  /* Creates and allocates a new result that matches the type and precision of this result and
   * uploads the CPU data that exist in this result. The result is assumed to be allocated on the
   * CPU. See the allocate_data method for more information on the from_pool parameters. */
-  Result upload_to_gpu(const bool from_pool);
+  Result upload_to_gpu(const bool from_pool) const;
+
+  /* Creates and allocates a new result that matches the type and precision of this result and
+   * downloads the GPU data that exist in this result. The result is assumed to be allocated on the
+   * GPU. */
+  Result download_to_cpu() const;

  /* Bind the GPU texture of the result to the texture image unit with the given name in the
   * currently bound given shader. This also inserts a memory barrier for texture fetches to ensure
@@ -273,6 +278,10 @@ class Result {
   * actual output of the operation. See the uses of the method for a practical example of use. */
  void steal_data(Result &source);

+  /* Similar to the Result variant of steal_data, but steals from a raw data buffer. The buffer is
+   * assumed to be allocated using Blender's guarded allocator.  */
+  void steal_data(void *data, int2 size);
+
  /* Set up the result to wrap an external GPU texture that is not allocated nor managed by the
   * result. The is_external_ member will be set to true, the domain will be set to have the same
   * size as the texture, and the texture will be set to the given texture. See the is_external_
@@ -351,6 +360,9 @@ class Result {
  /* Computes the number of channels of the result based on its type. */
  int64_t channels_count() const;

+  /* Computes the size of the result's data in bytes. */
+  int64_t size_in_bytes() const;
+
  blender::gpu::Texture *gpu_texture() const;

  GSpan cpu_data() const;
--- a/source/blender/compositor/algorithms/COM_algorithm_convolve.hh
+++ b/source/blender/compositor/algorithms/COM_algorithm_convolve.hh
@@ -0,0 +1,22 @@
+/* SPDX-FileCopyrightText: 2025 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+#pragma once
+
+#include "COM_context.hh"
+#include "COM_result.hh"
+
+namespace blender::compositor {
+
+/* Convolves the given color input by the given float or color kernel and write the result to the
+ * given output. If normalize_kernel is true, the kernel will be normalized such that it integrates
+ * to 1. The output will be allocated internally and is thus expected not to be previously
+ * allocated. */
+void convolve(Context &context,
+              const Result &input,
+              const Result &kernel,
+              Result &output,
+              const bool normalize_kernel);
+
+}  // namespace blender::compositor
--- a/source/blender/compositor/algorithms/intern/convolve.cc
+++ b/source/blender/compositor/algorithms/intern/convolve.cc
@@ -0,0 +1,274 @@
+/* SPDX-FileCopyrightText: 2025 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include <complex>
+#include <numeric>
+
+#include "BLI_array.hh"
+#include "BLI_assert.h"
+#include "BLI_enumerable_thread_specific.hh"
+#include "BLI_fftw.hh"
+#include "BLI_index_range.hh"
+#include "BLI_memory_utils.hh"
+#include "BLI_task.hh"
+
+#if defined(WITH_FFTW3)
+#  include <fftw3.h>
+#endif
+
+#include "COM_context.hh"
+#include "COM_result.hh"
+#include "COM_utilities.hh"
+
+#include "COM_algorithm_convolve.hh"
+
+namespace blender::compositor {
+
+void convolve(Context &context,
+              const Result &input,
+              const Result &kernel,
+              Result &output,
+              const bool normalize_kernel)
+{
+#if defined(WITH_FFTW3)
+  BLI_assert(input.type() == ResultType::Color);
+  BLI_assert(kernel.type() == ResultType::Float || kernel.type() == ResultType::Color);
+  BLI_assert(output.type() == ResultType::Color);
+
+  /* Since we will be doing a circular convolution, we need to zero pad the input image by the
+   * kernel size and vice versa to avoid the kernel affecting the pixels at the other side of
+   * image. The kernel size is limited by the image size since it will have no effect on the image
+   * during convolution. */
+  const int2 image_size = input.domain().size;
+  const int2 kernel_size = kernel.domain().size;
+  const int2 needed_padding_amount = math::max(kernel_size, image_size);
+  const int2 needed_spatial_size = image_size + needed_padding_amount - 1;
+  const int2 spatial_size = fftw::optimal_size_for_real_transform(needed_spatial_size);
+
+  /* The FFTW real to complex transforms utilizes the hermitian symmetry of real transforms and
+   * stores only half the output since the other half is redundant, so we only allocate half of
+   * the first dimension. See Section 4.3.4 Real-data DFT Array Format in the FFTW manual for
+   * more information. */
+  const int2 frequency_size = int2(spatial_size.x / 2 + 1, spatial_size.y);
+
+  constexpr int input_channels_count = 4;
+  const int64_t spatial_pixels_count = int64_t(spatial_size.x) * spatial_size.y;
+  const int64_t frequency_pixels_count = int64_t(frequency_size.x) * frequency_size.y;
+
+  /* A structure to gather all buffers that need to be forward transformed from the real to the
+   * frequency domain. */
+  struct ForwardTransformTask {
+    float *input;
+    std::complex<float> *output;
+  };
+  Vector<ForwardTransformTask> forward_transform_tasks;
+
+  /* Allocate a real buffer and a complex buffer for each of the input channels for the FFT input
+   * and output respectively, then add a forward transform task for it. */
+  Array<float *> image_spatial_domain_channels(input_channels_count);
+  Array<std::complex<float> *> image_frequency_domain_channels(input_channels_count);
+  for (const int channel : image_spatial_domain_channels.index_range()) {
+    image_spatial_domain_channels[channel] = fftwf_alloc_real(spatial_pixels_count);
+    image_frequency_domain_channels[channel] = reinterpret_cast<std::complex<float> *>(
+        fftwf_alloc_complex(frequency_pixels_count));
+    forward_transform_tasks.append(ForwardTransformTask{image_spatial_domain_channels[channel],
+                                                        image_frequency_domain_channels[channel]});
+  }
+
+  BLI_SCOPED_DEFER([&]() {
+    for (const int channel : image_spatial_domain_channels.index_range()) {
+      fftwf_free(image_spatial_domain_channels[channel]);
+      fftwf_free(image_frequency_domain_channels[channel]);
+    }
+  });
+
+  const int kernel_channels_count = kernel.channels_count();
+  const bool is_color_kernel = kernel_channels_count == 4;
+
+  /* Allocate a real buffer and a complex buffer for each of the kernel channels for the FFT input
+   * and output respectively, then add a forward transform task for it. */
+  Array<float *> kernel_spatial_domain_channels(kernel_channels_count);
+  Array<std::complex<float> *> kernel_frequency_domain_channels(kernel_channels_count);
+  for (const int channel : kernel_spatial_domain_channels.index_range()) {
+    kernel_spatial_domain_channels[channel] = fftwf_alloc_real(spatial_pixels_count);
+    kernel_frequency_domain_channels[channel] = reinterpret_cast<std::complex<float> *>(
+        fftwf_alloc_complex(frequency_pixels_count));
+    forward_transform_tasks.append(ForwardTransformTask{
+        kernel_spatial_domain_channels[channel], kernel_frequency_domain_channels[channel]});
+  }
+
+  BLI_SCOPED_DEFER([&]() {
+    for (const int channel : kernel_spatial_domain_channels.index_range()) {
+      fftwf_free(kernel_spatial_domain_channels[channel]);
+      fftwf_free(kernel_frequency_domain_channels[channel]);
+    }
+  });
+
+  /* Create a real to complex and complex to real plans to transform the image to the frequency
+   * domain.
+   *
+   * Notice that FFTW provides an advanced interface as per Section 4.4.2 Advanced Real-data DFTs
+   * to transform all image channels simultaneously with interleaved pixel layouts. But profiling
+   * showed better performance when running a single plan in parallel for all image channels with a
+   * planner pixel format, so this is what we will be doing.
+   *
+   * The input and output buffers here are dummy buffers and still not initialized, because they
+   * are required by the planner internally for planning and their data will be overwritten. So
+   * make sure not to initialize the buffers before creating the plan. */
+  fftwf_plan forward_plan = fftwf_plan_dft_r2c_2d(
+      spatial_size.y,
+      spatial_size.x,
+      image_spatial_domain_channels[0],
+      reinterpret_cast<fftwf_complex *>(image_frequency_domain_channels[0]),
+      FFTW_ESTIMATE);
+  fftwf_plan backward_plan = fftwf_plan_dft_c2r_2d(
+      spatial_size.y,
+      spatial_size.x,
+      reinterpret_cast<fftwf_complex *>(image_frequency_domain_channels[0]),
+      image_spatial_domain_channels[0],
+      FFTW_ESTIMATE);
+
+  BLI_SCOPED_DEFER([&]() {
+    fftwf_destroy_plan(forward_plan);
+    fftwf_destroy_plan(backward_plan);
+  });
+
+  /* Download GPU results to CPU for GPU contexts. */
+  Result input_cpu = context.use_gpu() ? input.download_to_cpu() : input;
+  Result kernel_cpu = context.use_gpu() ? kernel.download_to_cpu() : kernel;
+
+  BLI_SCOPED_DEFER([&]() {
+    if (context.use_gpu()) {
+      input_cpu.release();
+      kernel_cpu.release();
+    }
+  });
+
+  /* Zero pad the image to the required spatial domain size, storing each channel in planar
+   * format for better cache locality, that is, RRRR...GGGG...BBBB...AAAA. */
+  threading::memory_bandwidth_bound_task(spatial_pixels_count * sizeof(float), [&]() {
+    parallel_for(spatial_size, [&](const int2 texel) {
+      const float4 pixel_color = input_cpu.load_pixel_zero<float4>(texel);
+      for (const int channel : IndexRange(input_channels_count)) {
+        float *buffer = image_spatial_domain_channels[channel];
+        const int64_t index = texel.y * int64_t(spatial_size.x) + texel.x;
+        buffer[index] = pixel_color[channel];
+      }
+    });
+  });
+
+  /* Use doubles to sum the kernel since floats are not stable with threaded summation. We always
+   * use a double4 even for float kernels for generality, in that case, only the first component
+   * is initialized. */
+  threading::EnumerableThreadSpecific<double4> sum_by_thread([]() { return double4(0.0); });
+
+  /* Compute the kernel while zero padding to match the spatial size. */
+  const int2 kernel_center = kernel_size / 2;
+  parallel_for(spatial_size, [&](const int2 texel) {
+    /* We offset the computed kernel with wrap around such that it is centered at the zero
+     * point, which is the expected format for doing circular convolutions in the frequency
+     * domain. */
+    const int2 centered_texel = kernel_center - texel;
+    const int2 wrapped_texel = int2(mod_i(centered_texel.x, spatial_size.x),
+                                    mod_i(centered_texel.y, spatial_size.y));
+
+    const float4 kernel_value = is_color_kernel ?
+                                    kernel_cpu.load_pixel_zero<float4>(wrapped_texel) :
+                                    float4(kernel_cpu.load_pixel_zero<float>(wrapped_texel));
+    for (const int channel : IndexRange(kernel_channels_count)) {
+      float *buffer = kernel_spatial_domain_channels[channel];
+      buffer[texel.x + texel.y * int64_t(spatial_size.x)] = kernel_value[channel];
+    }
+    sum_by_thread.local() += double4(kernel_value);
+  });
+
+  /* The computed kernel is not normalized and should be normalized, but instead of normalizing the
+   * kernel during computation, we normalize it in the frequency domain when convolving the kernel
+   * to the image since we will be doing sample normalization anyways. This is okay since the
+   * Fourier transform is linear. */
+  const float4 sum = float4(
+      std::accumulate(sum_by_thread.begin(), sum_by_thread.end(), double4(0.0)));
+  const float4 sanitized_sum = float4(sum[0] == 0.0f ? 1.0f : sum[0],
+                                      sum[1] == 0.0f ? 1.0f : sum[1],
+                                      sum[2] == 0.0f ? 1.0f : sum[2],
+                                      sum[3] == 0.0f ? 1.0f : sum[3]);
+  const float4 normalization_factor = normalize_kernel ? sanitized_sum : float4(1.0f);
+
+  /* Transform all necessary data from the real domain to the frequency domain. */
+  threading::parallel_for(
+      forward_transform_tasks.index_range(), 1, [&](const IndexRange sub_range) {
+        for (const int64_t i : sub_range) {
+          fftwf_execute_dft_r2c(
+              forward_plan,
+              forward_transform_tasks[i].input,
+              reinterpret_cast<fftwf_complex *>(forward_transform_tasks[i].output));
+        }
+      });
+
+  /* Multiply the kernel and the image in the frequency domain to perform the convolution. The
+   * FFT is not normalized, meaning the result of the FFT followed by an inverse FFT will result
+   * in an image that is scaled by a factor of the product of the width and height, so we take
+   * that into account by dividing by that scale. See Section 4.8.6 Multi-dimensional Transforms
+   * of the FFTW manual for more information. */
+  const float4 normalization_scale = float(spatial_size.x) * spatial_size.y * normalization_factor;
+  threading::parallel_for(IndexRange(frequency_size.y), 1, [&](const IndexRange sub_y_range) {
+    for (const int64_t channel : IndexRange(input_channels_count)) {
+      const int kernel_channel = is_color_kernel ? channel : 0;
+      std::complex<float> *image_buffer = image_frequency_domain_channels[channel];
+      const std::complex<float> *kernel_buffer = kernel_frequency_domain_channels[kernel_channel];
+      for (const int64_t y : sub_y_range) {
+        for (const int64_t x : IndexRange(frequency_size.x)) {
+          const int64_t index = x + y * int64_t(frequency_size.x);
+          image_buffer[index] *= kernel_buffer[index] / normalization_scale[kernel_channel];
+        }
+      }
+    }
+  });
+
+  /* Transform channels from the frequency domain to the real domain. */
+  threading::parallel_for(IndexRange(input_channels_count), 1, [&](const IndexRange sub_range) {
+    for (const int64_t channel : sub_range) {
+      fftwf_execute_dft_c2r(
+          backward_plan,
+          reinterpret_cast<fftwf_complex *>(image_frequency_domain_channels[channel]),
+          image_spatial_domain_channels[channel]);
+    }
+  });
+
+  Result output_cpu = context.create_result(input.type());
+  output_cpu.allocate_texture(input.domain(), true, ResultStorageType::CPU);
+
+  /* Copy the result to the output. */
+  threading::memory_bandwidth_bound_task(input.size_in_bytes(), [&]() {
+    parallel_for(image_size, [&](const int2 texel) {
+      float4 color = float4(0.0f);
+      for (const int channel : IndexRange(input_channels_count)) {
+        const int64_t index = texel.x + texel.y * int64_t(spatial_size.x);
+        color[channel] = image_spatial_domain_channels[channel][index];
+      }
+      output_cpu.store_pixel(texel, color);
+    });
+  });
+
+  if (context.use_gpu()) {
+    output = output_cpu.upload_to_gpu(true);
+    output_cpu.release();
+  }
+  else {
+    output.steal_data(output_cpu);
+  }
+#else
+  output.allocate_texture(input.domain());
+  if (context.use_gpu()) {
+    GPU_texture_copy(output, input);
+  }
+  else {
+    parallel_for(output.domain().size, [&](const int2 texel) {
+      output.store_pixel(texel, input.load_pixel<float4>(texel));
+    });
+  }
+#endif
+}
+
+}  // namespace blender::compositor
--- a/source/blender/compositor/intern/result.cc
+++ b/source/blender/compositor/intern/result.cc
@@ -459,7 +459,7 @@ void Result::allocate_invalid()
  this->allocate_single_value();
 }

-Result Result::upload_to_gpu(const bool from_pool)
+Result Result::upload_to_gpu(const bool from_pool) const
 {
  BLI_assert(storage_type_ == ResultStorageType::CPU);
  BLI_assert(this->is_allocated());
@@ -471,6 +471,19 @@ Result Result::upload_to_gpu(const bool from_pool)
  return result;
 }

+Result Result::download_to_cpu() const
+{
+  BLI_assert(storage_type_ == ResultStorageType::GPU);
+  BLI_assert(this->is_allocated());
+
+  Result result = Result(*context_, this->type(), this->precision());
+  GPU_memory_barrier(GPU_BARRIER_TEXTURE_UPDATE);
+  void *data = GPU_texture_read(*this, this->get_gpu_data_format(), 0);
+  result.steal_data(data, this->domain().size);
+
+  return result;
+}
+
 void Result::bind_as_texture(gpu::Shader *shader, const char *texture_name) const
 {
  BLI_assert(storage_type_ == ResultStorageType::GPU);
@@ -538,6 +551,17 @@ void Result::steal_data(Result &source)
  source = Result(*context_, type_, precision_);
 }

+void Result::steal_data(void *data, int2 size)
+{
+  BLI_assert(!this->is_allocated());
+
+  const int64_t array_size = int64_t(size.x) * int64_t(size.y);
+  cpu_data_ = GMutableSpan(this->get_cpp_type(), data, array_size);
+  storage_type_ = ResultStorageType::CPU;
+  domain_ = Domain(size);
+  data_reference_count_ = new int(1);
+}
+
 /* Returns true if the given GPU texture is compatible with the type and precision of the given
 * result. */
 [[maybe_unused]] static bool is_compatible_texture(const blender::gpu::Texture *texture,
@@ -753,6 +777,16 @@ int Result::reference_count() const
  return reference_count_;
 }

+int64_t Result::size_in_bytes() const
+{
+  const int64_t pixel_size = this->get_cpp_type().size;
+  if (this->is_single_value()) {
+    return pixel_size;
+  }
+  const int2 image_size = this->domain().size;
+  return pixel_size * image_size.x * image_size.y;
+}
+
 GPointer Result::single_value() const
 {
  return std::visit([](const auto &value) { return GPointer(&value); }, single_value_);
--- a/source/blender/makesrna/intern/rna_nodetree.cc
+++ b/source/blender/makesrna/intern/rna_nodetree.cc
@@ -9907,6 +9907,7 @@ static void rna_def_nodes(BlenderRNA *brna)
  define("CompositorNode", "CompositorNodeColorCorrection");
  define("CompositorNode", "CompositorNodeColorMatte");
  define("CompositorNode", "CompositorNodeColorSpill", def_cmp_color_spill);
+  define("CompositorNode", "CompositorNodeConvolve");
  define("CompositorNode", "CompositorNodeCombHSVA");
  define("CompositorNode", "CompositorNodeCombineColor", def_cmp_combsep_color);
  define("CompositorNode", "CompositorNodeCombRGBA");
--- a/source/blender/nodes/composite/CMakeLists.txt
+++ b/source/blender/nodes/composite/CMakeLists.txt
@@ -37,6 +37,7 @@ set(SRC
  nodes/node_composite_color_spill.cc
  nodes/node_composite_colorbalance.cc
  nodes/node_composite_colorcorrection.cc
+  nodes/node_composite_convolve.cc
  nodes/node_composite_common.cc
  nodes/node_composite_convert_color_space.cc
  nodes/node_composite_cornerpin.cc
--- a/source/blender/nodes/composite/nodes/node_composite_convolve.cc
+++ b/source/blender/nodes/composite/nodes/node_composite_convolve.cc
@@ -0,0 +1,124 @@
+/* SPDX-FileCopyrightText: 2025 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include <cstdint>
+
+#include "COM_algorithm_convolve.hh"
+#include "COM_node_operation.hh"
+
+#include "node_composite_util.hh"
+
+namespace blender::nodes::node_composite_convolve_cc {
+
+enum class KernelDataType : uint8_t {
+  Float = 0,
+  Color = 1,
+};
+
+static const EnumPropertyItem kernel_data_type_items[] = {
+    {int(KernelDataType::Float),
+     "FLOAT",
+     0,
+     "Float",
+     "The kernel is a float and will be convolved with all input channels"},
+    {int(KernelDataType::Color),
+     "COLOR",
+     0,
+     "Color",
+     "The kernel is a color and each channel of the kernel will be convolved with each respective "
+     "channel in the input"},
+    {0, nullptr, 0, nullptr, nullptr},
+};
+
+static void node_declare(NodeDeclarationBuilder &b)
+{
+  b.add_input<decl::Color>("Image").hide_value().structure_type(StructureType::Dynamic);
+  b.add_input<decl::Menu>("Kernel Data Type")
+      .default_value(KernelDataType::Float)
+      .static_items(kernel_data_type_items);
+  b.add_input<decl::Float>("Kernel", "Float Kernel")
+      .hide_value()
+      .structure_type(StructureType::Dynamic)
+      .usage_by_single_menu(int(KernelDataType::Float))
+      .compositor_realization_mode(CompositorInputRealizationMode::Transforms);
+  b.add_input<decl::Color>("Kernel", "Color Kernel")
+      .hide_value()
+      .structure_type(StructureType::Dynamic)
+      .usage_by_single_menu(int(KernelDataType::Color))
+      .compositor_realization_mode(CompositorInputRealizationMode::Transforms);
+  b.add_input<decl::Bool>("Normalize Kernel")
+      .default_value(true)
+      .description("Normalizes the kernel such that it integrates to one");
+
+  b.add_output<decl::Color>("Image").structure_type(StructureType::Dynamic);
+}
+
+using namespace blender::compositor;
+
+class ConvolveOperation : public NodeOperation {
+ public:
+  using NodeOperation::NodeOperation;
+
+  void execute() override
+  {
+    const Result &input = this->get_input("Image");
+    const Result &kernel = this->get_kernel_input();
+    Result &output = this->get_result("Image");
+
+    if (input.is_single_value() || kernel.is_single_value()) {
+      output.share_data(input);
+      return;
+    }
+
+    convolve(this->context(), input, kernel, output, this->get_normalize_kernel());
+  }
+
+  const Result &get_kernel_input()
+  {
+    switch (this->get_kernel_data_type()) {
+      case KernelDataType::Float:
+        return this->get_input("Float Kernel");
+      case KernelDataType::Color:
+        return this->get_input("Color Kernel");
+    }
+
+    BLI_assert_unreachable();
+    return this->get_input("Float Kernel");
+  }
+
+  KernelDataType get_kernel_data_type()
+  {
+    const Result &input = this->get_input("Kernel Data Type");
+    const MenuValue default_menu_value = MenuValue(KernelDataType::Float);
+    const MenuValue menu_value = input.get_single_value_default(default_menu_value);
+    return static_cast<KernelDataType>(menu_value.value);
+  }
+
+  bool get_normalize_kernel()
+  {
+    return this->get_input("Normalize Kernel").get_single_value_default(true);
+  }
+};
+
+static NodeOperation *get_compositor_operation(Context &context, DNode node)
+{
+  return new ConvolveOperation(context, node);
+}
+
+static void node_register()
+{
+  static blender::bke::bNodeType ntype;
+
+  cmp_node_type_base(&ntype, "CompositorNodeConvolve");
+  ntype.ui_name = "Convolve";
+  ntype.ui_description = "Convolves an image with a kernel";
+  ntype.nclass = NODE_CLASS_OP_FILTER;
+  ntype.declare = node_declare;
+  ntype.get_compositor_operation = get_compositor_operation;
+
+  blender::bke::node_register_type(ntype);
+}
+NOD_REGISTER_NODE(node_register)
+
+}  // namespace blender::nodes::node_composite_convolve_cc