Realtime Compositor: Add static cached images

The Realtime compositor currently relies on the GPU cache in image IDs. That cache only supports single layer images, so multi-layer images will be acquired without a cache, introducing significant IO bottlenecks for the GPU compositor. This patch ignores the image GPU cache and stores the images in the static cache manager of the compositor. Draw data was introduced to the image ID for proper cache invalidation, like other IDs such as masks. The downside is that the cache will no longer be shared between EEVEE and the compositor. But realistically, images are not typically shared between materials and compositors. This is just a temporary solution until we have proper GPU storage support for image buffers. Pull Request: https://projects.blender.org/blender/blender/pulls/115511
2023-12-13 09:50:42 +01:00
parent a56d0c700c
commit 356480fabb
17 changed files with 519 additions and 193 deletions
--- a/source/blender/blenkernel/intern/image.cc
+++ b/source/blender/blenkernel/intern/image.cc
@@ -96,6 +96,8 @@
 #include "DEG_depsgraph.hh"
 #include "DEG_depsgraph_query.hh"

+#include "DRW_engine.h"
+
 #include "BLO_read_write.hh"

 /* for image user iteration */
@@ -180,6 +182,7 @@ static void image_copy_data(Main * /*bmain*/, ID *id_dst, const ID *id_src, cons
  }

  BLI_listbase_clear(&image_dst->anims);
+  BLI_listbase_clear(reinterpret_cast<ListBase *>(&image_dst->drawdata));

  BLI_duplicatelist(&image_dst->tiles, &image_src->tiles);

@@ -223,6 +226,7 @@ static void image_free_data(ID *id)
  BKE_previewimg_free(&image->preview);

  BLI_freelistN(&image->tiles);
+  DRW_drawdata_free(id);

  image_runtime_free_data(image);
 }
--- a/source/blender/compositor/realtime_compositor/CMakeLists.txt
+++ b/source/blender/compositor/realtime_compositor/CMakeLists.txt
@@ -84,6 +84,7 @@ set(SRC
  algorithms/COM_algorithm_transform.hh

  cached_resources/intern/bokeh_kernel.cc
+  cached_resources/intern/cached_image.cc
  cached_resources/intern/cached_mask.cc
  cached_resources/intern/cached_shader.cc
  cached_resources/intern/cached_texture.cc
@@ -96,6 +97,7 @@ set(SRC
  cached_resources/intern/symmetric_separable_blur_weights.cc

  cached_resources/COM_bokeh_kernel.hh
+  cached_resources/COM_cached_image.hh
  cached_resources/COM_cached_mask.hh
  cached_resources/COM_cached_resource.hh
  cached_resources/COM_cached_shader.hh
@@ -175,6 +177,7 @@ set(GLSL_SRC
  shaders/compositor_parallel_reduction.glsl
  shaders/compositor_plane_deform.glsl
  shaders/compositor_plane_deform_motion_blur.glsl
+  shaders/compositor_premultiply_alpha.glsl
  shaders/compositor_projector_lens_distortion.glsl
  shaders/compositor_read_input.glsl
  shaders/compositor_realize_on_domain.glsl
@@ -292,6 +295,7 @@ set(SRC_SHADER_CREATE_INFOS
  shaders/infos/compositor_parallel_reduction_info.hh
  shaders/infos/compositor_plane_deform_info.hh
  shaders/infos/compositor_plane_deform_motion_blur_info.hh
+  shaders/infos/compositor_premultiply_alpha_info.hh
  shaders/infos/compositor_projector_lens_distortion_info.hh
  shaders/infos/compositor_read_input_info.hh
  shaders/infos/compositor_realize_on_domain_info.hh
--- a/source/blender/compositor/realtime_compositor/COM_result.hh
+++ b/source/blender/compositor/realtime_compositor/COM_result.hh
@@ -300,6 +300,12 @@ class Result {
  /* Returns the type of the result. */
  ResultType type() const;

+  /* Returns the precision of the result. */
+  ResultPrecision precision() const;
+
+  /* Sets the precision of the result. */
+  void set_precision(ResultPrecision precision);
+
  /* Returns true if the result is a texture and false of it is a single value. */
  bool is_texture() const;

--- a/source/blender/compositor/realtime_compositor/COM_static_cache_manager.hh
+++ b/source/blender/compositor/realtime_compositor/COM_static_cache_manager.hh
@@ -5,6 +5,7 @@
 #pragma once

 #include "COM_bokeh_kernel.hh"
+#include "COM_cached_image.hh"
 #include "COM_cached_mask.hh"
 #include "COM_cached_shader.hh"
 #include "COM_cached_texture.hh"
@@ -53,6 +54,7 @@ class StaticCacheManager {
  KeyingScreenContainer keying_screens;
  CachedShaderContainer cached_shaders;
  BokehKernelContainer bokeh_kernels;
+  CachedImageContainer cached_images;

  /* Reset the cache manager by deleting the cached resources that are no longer needed because
   * they weren't used in the last evaluation and prepare the remaining cached resources to track
--- a/source/blender/compositor/realtime_compositor/cached_resources/COM_cached_image.hh
+++ b/source/blender/compositor/realtime_compositor/cached_resources/COM_cached_image.hh
@@ -0,0 +1,77 @@
+/* SPDX-FileCopyrightText: 2023 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+#pragma once
+
+#include <cstdint>
+#include <memory>
+#include <string>
+
+#include "BLI_map.hh"
+
+#include "GPU_texture.h"
+
+#include "DNA_image_types.h"
+
+#include "COM_cached_resource.hh"
+
+namespace blender::realtime_compositor {
+
+class Context;
+
+/* ------------------------------------------------------------------------------------------------
+ * Cached Image Key.
+ */
+class CachedImageKey {
+ public:
+  ImageUser image_user;
+  std::string pass_name;
+
+  CachedImageKey(ImageUser image_user, std::string pass_name);
+
+  uint64_t hash() const;
+};
+
+bool operator==(const CachedImageKey &a, const CachedImageKey &b);
+
+/* -------------------------------------------------------------------------------------------------
+ * Cached Image.
+ *
+ * A cached resource that computes and caches a GPU texture containing the contents of the image
+ * with the given image user. */
+class CachedImage : public CachedResource {
+ private:
+  GPUTexture *texture_ = nullptr;
+
+ public:
+  CachedImage(Context &context, Image *image, ImageUser *image_user, const char *pass_name);
+
+  ~CachedImage();
+
+  GPUTexture *texture();
+};
+
+/* ------------------------------------------------------------------------------------------------
+ * Cached Image Container.
+ */
+class CachedImageContainer : CachedResourceContainer {
+ private:
+  Map<std::string, Map<CachedImageKey, std::unique_ptr<CachedImage>>> map_;
+
+ public:
+  void reset() override;
+
+  /* Check if the given image ID has changed since the last time it was retrieved through its
+   * recalculate flag, and if so, invalidate its corresponding cached image and reset the
+   * recalculate flag to ready it to track the next change. Then, check if there is an available
+   * CachedImage cached resource with the given image user and pass_name in the container, if one
+   * exists, return it, otherwise, return a newly created one and add it to the container. In both
+   * cases, tag the cached resource as needed to keep it cached for the next evaluation. */
+  GPUTexture *get(Context &context,
+                  Image *image,
+                  const ImageUser *image_user,
+                  const char *pass_name);
+};
+
+}  // namespace blender::realtime_compositor
--- a/source/blender/compositor/realtime_compositor/cached_resources/intern/cached_image.cc
+++ b/source/blender/compositor/realtime_compositor/cached_resources/intern/cached_image.cc
@@ -0,0 +1,321 @@
+/* SPDX-FileCopyrightText: 2023 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include <cstdint>
+#include <memory>
+
+#include "BLI_array.hh"
+#include "BLI_assert.h"
+#include "BLI_hash.hh"
+#include "BLI_listbase.h"
+
+#include "RE_pipeline.h"
+
+#include "GPU_shader.h"
+#include "GPU_texture.h"
+
+#include "IMB_imbuf.h"
+#include "IMB_imbuf_types.h"
+
+#include "BKE_image.h"
+#include "BKE_lib_id.h"
+
+#include "DNA_ID.h"
+#include "DNA_image_types.h"
+
+#include "COM_cached_image.hh"
+#include "COM_context.hh"
+#include "COM_result.hh"
+#include "COM_utilities.hh"
+
+namespace blender::realtime_compositor {
+
+/* --------------------------------------------------------------------
+ * Cached Image Key.
+ */
+
+CachedImageKey::CachedImageKey(ImageUser image_user, std::string pass_name)
+    : image_user(image_user), pass_name(pass_name)
+{
+}
+
+uint64_t CachedImageKey::hash() const
+{
+  return get_default_hash_4(image_user.framenr, image_user.layer, image_user.view, pass_name);
+}
+
+bool operator==(const CachedImageKey &a, const CachedImageKey &b)
+{
+  return a.image_user.framenr == b.image_user.framenr &&
+         a.image_user.layer == b.image_user.layer && a.image_user.view == b.image_user.view &&
+         a.pass_name == b.pass_name;
+}
+
+/* --------------------------------------------------------------------
+ * Cached Image.
+ */
+
+/* Returns a new texture of the given format and precision preprocessed using the given shader. The
+ * input texture is freed. */
+static GPUTexture *preprocess_texture(Context &context,
+                                      GPUTexture *input_texture,
+                                      eGPUTextureFormat target_format,
+                                      ResultPrecision precision,
+                                      const char *shader_name)
+{
+  const int2 size = int2(GPU_texture_width(input_texture), GPU_texture_height(input_texture));
+
+  GPUTexture *preprocessed_texture = GPU_texture_create_2d(
+      "Cached Image", size.x, size.y, 1, target_format, GPU_TEXTURE_USAGE_GENERAL, nullptr);
+
+  GPUShader *shader = context.get_shader(shader_name, precision);
+  GPU_shader_bind(shader);
+
+  const int input_unit = GPU_shader_get_sampler_binding(shader, "input_tx");
+  GPU_texture_bind(input_texture, input_unit);
+
+  const int image_unit = GPU_shader_get_sampler_binding(shader, "output_img");
+  GPU_texture_image_bind(preprocessed_texture, image_unit);
+
+  compute_dispatch_threads_at_least(shader, size);
+
+  GPU_shader_unbind();
+  GPU_texture_unbind(input_texture);
+  GPU_texture_image_unbind(preprocessed_texture);
+  GPU_texture_free(input_texture);
+
+  return preprocessed_texture;
+}
+
+/* Compositor images are expected to be always pre-multiplied, so identify if the GPU texture
+ * returned by the IMB module is straight and needs to be pre-multiplied. An exception is when
+ * the image has an alpha mode of channel packed or alpha ignore, in which case, we always ignore
+ * pre-multiplication. */
+static bool should_premultiply_alpha(Image *image, ImBuf *image_buffer)
+{
+  if (ELEM(image->alpha_mode, IMA_ALPHA_CHANNEL_PACKED, IMA_ALPHA_IGNORE)) {
+    return false;
+  }
+
+  return !BKE_image_has_gpu_texture_premultiplied_alpha(image, image_buffer);
+}
+
+/* Get a suitable texture format supported by the compositor given the format of the texture
+ * returned by the IMB module. See imb_gpu_get_format for the formats that needs to be handled. */
+static eGPUTextureFormat get_compatible_texture_format(eGPUTextureFormat original_format)
+{
+  switch (original_format) {
+    case GPU_R16F:
+    case GPU_R32F:
+    case GPU_RGBA16F:
+    case GPU_RGBA32F:
+      return original_format;
+    case GPU_R8:
+      return GPU_R16F;
+    case GPU_RGBA8:
+    case GPU_SRGB8_A8:
+      return GPU_RGBA16F;
+    default:
+      break;
+  }
+
+  BLI_assert_unreachable();
+  return original_format;
+}
+
+/* Get the selected render layer selected assuming the image is a multilayer image. */
+static RenderLayer *get_render_layer(Image *image, ImageUser &image_user)
+{
+  const ListBase *layers = &image->rr->layers;
+  return static_cast<RenderLayer *>(BLI_findlink(layers, image_user.layer));
+}
+
+/* Get the index of the pass with the given name in the selected render layer's passes list
+ * assuming the image is a multilayer image. */
+static int get_pass_index(Image *image, ImageUser &image_user, const char *name)
+{
+  const RenderLayer *render_layer = get_render_layer(image, image_user);
+  return BLI_findstringindex(&render_layer->passes, name, offsetof(RenderPass, name));
+}
+
+/* Get the index of the view selected in the image user. If the image is not a multi-view image
+ * or only has a single view, then zero is returned. Otherwise, if the image is a multi-view
+ * image, the index of the selected view is returned. However, note that the value of the view
+ * member of the image user is not the actual index of the view. More specifically, the index 0
+ * is reserved to denote the special mode of operation "All", which dynamically selects the view
+ * whose name matches the view currently being rendered. It follows that the views are then
+ * indexed starting from 1. So for non zero view values, the actual index of the view is the
+ * value of the view member of the image user minus 1. */
+static int get_view_index(Context &context, Image *image, ImageUser &image_user)
+{
+  /* The image is not a multi-view image, so just return zero. */
+  if (!BKE_image_is_multiview(image)) {
+    return 0;
+  }
+
+  const ListBase *views = &image->rr->views;
+  /* There is only one view and its index is 0. */
+  if (BLI_listbase_count_at_most(views, 2) < 2) {
+    return 0;
+  }
+
+  const int view = image_user.view;
+  /* The view is not zero, which means it is manually specified and the actual index is then the
+   * view value minus 1. */
+  if (view != 0) {
+    return view - 1;
+  }
+
+  /* Otherwise, the view value is zero, denoting the special mode of operation "All", which finds
+   * the index of the view whose name matches the view currently being rendered. */
+  const char *view_name = context.get_view_name().data();
+  const int matched_view = BLI_findstringindex(views, view_name, offsetof(RenderView, name));
+
+  /* No view matches the view currently being rendered, so fallback to the first view. */
+  if (matched_view == -1) {
+    return 0;
+  }
+
+  return matched_view;
+}
+
+/* Get a copy of the image user that is appropriate to retrieve the needed image buffer from the
+ * image. This essentially sets the appropriate frame, pass, and view that corresponds to the
+ * given context and pass name. */
+static ImageUser compute_image_user_for_pass(Context &context,
+                                             Image *image,
+                                             const ImageUser *image_user,
+                                             const char *pass_name)
+{
+  ImageUser image_user_for_pass = *image_user;
+
+  /* Set the needed view. */
+  image_user_for_pass.view = get_view_index(context, image, image_user_for_pass);
+
+  /* Set the needed pass. */
+  if (BKE_image_is_multilayer(image)) {
+    image_user_for_pass.pass = get_pass_index(image, image_user_for_pass, pass_name);
+    BKE_image_multilayer_index(image->rr, &image_user_for_pass);
+  }
+  else {
+    BKE_image_multiview_index(image, &image_user_for_pass);
+  }
+
+  return image_user_for_pass;
+}
+
+CachedImage::CachedImage(Context &context,
+                         Image *image,
+                         ImageUser *image_user,
+                         const char *pass_name)
+{
+  /* We can't retrieve the needed image buffer yet, because we still need to assign the pass index
+   * to the image user in order to acquire the image buffer corresponding to the given pass name.
+   * However, in order to compute the pass index, we need the render result structure of the image
+   * to be initialized. So we first acquire a dummy image buffer since it initializes the image
+   * render result as a side effect. We also use that as a mean of validation, since we can early
+   * exit if the returned image buffer is nullptr. This image buffer can be immediately released.
+   * Since it carries no important information. */
+  ImBuf *initial_image_buffer = BKE_image_acquire_ibuf(image, image_user, nullptr);
+  BKE_image_release_ibuf(image, initial_image_buffer, nullptr);
+  if (!initial_image_buffer) {
+    return;
+  }
+
+  ImageUser image_user_for_pass = compute_image_user_for_pass(
+      context, image, image_user, pass_name);
+
+  ImBuf *image_buffer = BKE_image_acquire_ibuf(image, &image_user_for_pass, nullptr);
+  const bool is_premultiplied = BKE_image_has_gpu_texture_premultiplied_alpha(image, image_buffer);
+  texture_ = IMB_create_gpu_texture("Image Texture", image_buffer, true, is_premultiplied);
+
+  const eGPUTextureFormat original_format = GPU_texture_format(texture_);
+  const eGPUTextureFormat target_format = get_compatible_texture_format(original_format);
+  const ResultType result_type = Result::type(target_format);
+  const ResultPrecision precision = Result::precision(target_format);
+
+  /* The GPU image returned by the IMB module can be in a format not supported by the compositor,
+   * or it might need premultiplication, so preprocess them first. */
+  if (result_type == ResultType::Color && should_premultiply_alpha(image, image_buffer)) {
+    texture_ = preprocess_texture(
+        context, texture_, target_format, precision, "compositor_premultiply_alpha");
+  }
+  else if (original_format != target_format) {
+    const char *conversion_shader_name = result_type == ResultType::Float ?
+                                             "compositor_convert_float_to_float" :
+                                             "compositor_convert_color_to_color";
+    texture_ = preprocess_texture(
+        context, texture_, target_format, precision, conversion_shader_name);
+  }
+
+  /* Set the alpha to 1 using swizzling if alpha is ignored. */
+  if (result_type == ResultType::Color && image->alpha_mode == IMA_ALPHA_IGNORE) {
+    GPU_texture_swizzle_set(texture_, "rgb1");
+  }
+
+  BKE_image_release_ibuf(image, image_buffer, nullptr);
+}
+
+CachedImage::~CachedImage()
+{
+  GPU_texture_free(texture_);
+}
+
+GPUTexture *CachedImage::texture()
+{
+  return texture_;
+}
+
+/* --------------------------------------------------------------------
+ * Cached Image Container.
+ */
+
+void CachedImageContainer::reset()
+{
+  /* First, delete all cached images that are no longer needed. */
+  for (auto &cached_images_for_id : map_.values()) {
+    cached_images_for_id.remove_if([](auto item) { return !item.value->needed; });
+  }
+  map_.remove_if([](auto item) { return item.value.is_empty(); });
+
+  /* Second, reset the needed status of the remaining cached images to false to ready them to
+   * track their needed status for the next evaluation. */
+  for (auto &cached_images_for_id : map_.values()) {
+    for (auto &value : cached_images_for_id.values()) {
+      value->needed = false;
+    }
+  }
+}
+
+GPUTexture *CachedImageContainer::get(Context &context,
+                                      Image *image,
+                                      const ImageUser *image_user,
+                                      const char *pass_name)
+{
+  if (!image || !image_user) {
+    return nullptr;
+  }
+
+  /* Compute the effective frame number of the image if it was animated. */
+  ImageUser image_user_for_frame = *image_user;
+  BKE_image_user_frame_calc(image, &image_user_for_frame, context.get_frame_number());
+
+  const CachedImageKey key(image_user_for_frame, pass_name);
+
+  auto &cached_images_for_id = map_.lookup_or_add_default(image->id.name);
+
+  /* Invalidate the cache for that image ID if it was changed and reset the recalculate flag. */
+  if (context.query_id_recalc_flag(reinterpret_cast<ID *>(image)) & ID_RECALC_ALL) {
+    cached_images_for_id.clear();
+  }
+
+  auto &cached_image = *cached_images_for_id.lookup_or_add_cb(key, [&]() {
+    return std::make_unique<CachedImage>(context, image, &image_user_for_frame, pass_name);
+  });
+
+  cached_image.needed = true;
+  return cached_image.texture();
+}
+
+}  // namespace blender::realtime_compositor
--- a/source/blender/compositor/realtime_compositor/intern/result.cc
+++ b/source/blender/compositor/realtime_compositor/intern/result.cc
@@ -433,6 +433,18 @@ ResultType Result::type() const
  return type_;
 }

+ResultPrecision Result::precision() const
+{
+  return precision_;
+}
+
+void Result::set_precision(ResultPrecision precision)
+{
+  /* Changing the precision can only be done if it wasn't allocated yet. */
+  BLI_assert(!is_allocated());
+  precision_ = precision;
+}
+
 bool Result::is_texture() const
 {
  return !is_single_value_;
--- a/source/blender/compositor/realtime_compositor/intern/static_cache_manager.cc
+++ b/source/blender/compositor/realtime_compositor/intern/static_cache_manager.cc
@@ -19,6 +19,7 @@ void StaticCacheManager::reset()
  keying_screens.reset();
  cached_shaders.reset();
  bokeh_kernels.reset();
+  cached_images.reset();
 }

 }  // namespace blender::realtime_compositor
--- a/source/blender/compositor/realtime_compositor/shaders/compositor_premultiply_alpha.glsl
+++ b/source/blender/compositor/realtime_compositor/shaders/compositor_premultiply_alpha.glsl
@@ -0,0 +1,12 @@
+/* SPDX-FileCopyrightText: 2023 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
+
+void main()
+{
+  ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
+  vec4 input_color = texture_load(input_tx, texel);
+  imageStore(output_img, texel, input_color * vec4(vec3(input_color.a), 1.0));
+}
--- a/source/blender/compositor/realtime_compositor/shaders/infos/compositor_convert_info.hh
+++ b/source/blender/compositor/realtime_compositor/shaders/infos/compositor_convert_info.hh
@@ -10,6 +10,12 @@ GPU_SHADER_CREATE_INFO(compositor_convert_shared)
    .typedef_source("gpu_shader_compositor_type_conversion.glsl")
    .compute_source("compositor_convert.glsl");

+GPU_SHADER_CREATE_INFO(compositor_convert_float_to_float)
+    .additional_info("compositor_convert_shared")
+    .image(0, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
+    .define("CONVERT_EXPRESSION(value)", "value")
+    .do_static_compilation(true);
+
 GPU_SHADER_CREATE_INFO(compositor_convert_float_to_vector)
    .additional_info("compositor_convert_shared")
    .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
@@ -34,14 +40,32 @@ GPU_SHADER_CREATE_INFO(compositor_convert_color_to_vector)
    .define("CONVERT_EXPRESSION(value)", "vec4(vec3_from_vec4(value), 0.0)")
    .do_static_compilation(true);

+GPU_SHADER_CREATE_INFO(compositor_convert_color_to_color)
+    .additional_info("compositor_convert_shared")
+    .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
+    .define("CONVERT_EXPRESSION(value)", "value")
+    .do_static_compilation(true);
+
 GPU_SHADER_CREATE_INFO(compositor_convert_vector_to_float)
    .additional_info("compositor_convert_shared")
    .image(0, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
    .define("CONVERT_EXPRESSION(value)", "vec4(float_from_vec3(value.xyz), vec3(0.0))")
    .do_static_compilation(true);

+GPU_SHADER_CREATE_INFO(compositor_convert_vector_to_vector)
+    .additional_info("compositor_convert_shared")
+    .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
+    .define("CONVERT_EXPRESSION(value)", "value")
+    .do_static_compilation(true);
+
 GPU_SHADER_CREATE_INFO(compositor_convert_vector_to_color)
    .additional_info("compositor_convert_shared")
    .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
    .define("CONVERT_EXPRESSION(value)", "vec4_from_vec3(value.xyz)")
    .do_static_compilation(true);
+
+GPU_SHADER_CREATE_INFO(compositor_convert_color_to_alpha)
+    .additional_info("compositor_convert_shared")
+    .image(0, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
+    .define("CONVERT_EXPRESSION(value)", "vec4(value.a)")
+    .do_static_compilation(true);
--- a/source/blender/compositor/realtime_compositor/shaders/infos/compositor_premultiply_alpha_info.hh
+++ b/source/blender/compositor/realtime_compositor/shaders/infos/compositor_premultiply_alpha_info.hh
@@ -0,0 +1,12 @@
+/* SPDX-FileCopyrightText: 2023 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include "gpu_shader_create_info.hh"
+
+GPU_SHADER_CREATE_INFO(compositor_premultiply_alpha)
+    .local_group_size(16, 16)
+    .sampler(0, ImageType::FLOAT_2D, "input_tx")
+    .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
+    .compute_source("compositor_premultiply_alpha.glsl")
+    .do_static_compilation(true);
--- a/source/blender/compositor/realtime_compositor/shaders/infos/compositor_read_input_info.hh
+++ b/source/blender/compositor/realtime_compositor/shaders/infos/compositor_read_input_info.hh
@@ -24,10 +24,8 @@ GPU_SHADER_CREATE_INFO(compositor_read_input_vector)

 GPU_SHADER_CREATE_INFO(compositor_read_input_color)
    .additional_info("compositor_read_input_shared")
-    .push_constant(Type::BOOL, "premultiply_alpha")
    .image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
-    .define("READ_EXPRESSION(input_color)",
-            "input_color * vec4(vec3(premultiply_alpha ? input_color.a : 1.0), 1.0)")
+    .define("READ_EXPRESSION(input_color)", "input_color")
    .do_static_compilation(true);

 GPU_SHADER_CREATE_INFO(compositor_read_input_alpha)
--- a/source/blender/draw/intern/draw_manager_c.cc
+++ b/source/blender/draw/intern/draw_manager_c.cc
@@ -842,6 +842,7 @@ static bool id_type_can_have_drawdata(const short id_type)
    case ID_TE:
    case ID_MSK:
    case ID_MC:
+    case ID_IM:
      return true;

    /* no DrawData */
--- a/source/blender/makesdna/DNA_image_types.h
+++ b/source/blender/makesdna/DNA_image_types.h
@@ -136,6 +136,12 @@ typedef struct Image_Runtime {

 typedef struct Image {
  ID id;
+  struct AnimData *adt;
+  /**
+   * Engines draw data, must be immediately after AnimData. See IdDdtTemplate and
+   * DRW_drawdatalist_from_id to understand this requirement.
+   */
+  DrawDataList drawdata;

  /** File path, 1024 = FILE_MAX. */
  char filepath[1024];
--- a/source/blender/nodes/composite/nodes/node_composite_image.cc
+++ b/source/blender/nodes/composite/nodes/node_composite_image.cc
@@ -445,88 +445,45 @@ class ImageOperation : public NodeOperation {

  void execute() override
  {
-    if (!is_valid()) {
-      allocate_invalid();
-      return;
-    }
-
-    update_image_frame_number();
-
    for (const bNodeSocket *output : this->node()->output_sockets()) {
      compute_output(output->identifier);
    }
  }

-  /* Returns true if the node results can be computed, otherwise, returns false. */
-  bool is_valid()
-  {
-    Image *image = get_image();
-    ImageUser *image_user = get_image_user();
-    if (!image || !image_user) {
-      return false;
-    }
-
-    if (BKE_image_is_multilayer(image)) {
-      if (!image->rr) {
-        return false;
-      }
-
-      RenderLayer *render_layer = get_render_layer();
-      if (!render_layer) {
-        return false;
-      }
-    }
-
-    return true;
-  }
-
-  /* Allocate all needed outputs as invalid. This should be called when is_valid returns false. */
-  void allocate_invalid()
-  {
-    for (const bNodeSocket *output : this->node()->output_sockets()) {
-      if (!should_compute_output(output->identifier)) {
-        continue;
-      }
-
-      Result &result = get_result(output->identifier);
-      result.allocate_invalid();
-    }
-  }
-
-  /* Compute the effective frame number of the image if it was animated and invalidate the cached
-   * GPU texture if the computed frame number is different. */
-  void update_image_frame_number()
-  {
-    BKE_image_user_frame_calc(get_image(), get_image_user(), context().get_frame_number());
-  }
-
  void compute_output(StringRef identifier)
  {
    if (!should_compute_output(identifier)) {
      return;
    }

-    ImageUser image_user = compute_image_user_for_output(identifier);
-    BKE_image_ensure_gpu_texture(get_image(), &image_user);
-    GPUTexture *image_texture = BKE_image_get_gpu_texture(get_image(), &image_user, nullptr);
+    GPUTexture *image_texture = context().cache_manager().cached_images.get(
+        context(), get_image(), get_image_user(), get_pass_name(identifier));

-    const int2 size = int2(GPU_texture_width(image_texture), GPU_texture_height(image_texture));
    Result &result = get_result(identifier);
-    result.allocate_texture(Domain(size));
-
-    GPUShader *shader = context().get_shader(get_shader_name(identifier));
-    GPU_shader_bind(shader);
-
-    const int2 lower_bound = int2(0);
-    GPU_shader_uniform_2iv(shader, "lower_bound", lower_bound);
-
-    if (result.type() == ResultType::Color) {
-      GPU_shader_uniform_1b(shader, "premultiply_alpha", should_premultiply_alpha(image_user));
+    if (!image_texture) {
+      result.allocate_invalid();
+      return;
    }

+    const ResultPrecision precision = Result::precision(GPU_texture_format(image_texture));
+
+    /* Alpha is mot an actual pass, but one that is extracted from the combined pass. So we need to
+     * extract it using a shader. */
+    if (identifier != "Alpha") {
+      result.set_precision(precision);
+      result.wrap_external(image_texture);
+      return;
+    }
+
+    GPUShader *shader = context().get_shader("compositor_convert_color_to_alpha", precision);
+    GPU_shader_bind(shader);
+
    const int input_unit = GPU_shader_get_sampler_binding(shader, "input_tx");
    GPU_texture_bind(image_texture, input_unit);

+    const int2 size = int2(GPU_texture_width(image_texture), GPU_texture_height(image_texture));
+    result.allocate_texture(Domain(size));
+
    result.bind_as_image(shader, "output_img");

    compute_dispatch_threads_at_least(shader, size);
@@ -536,139 +493,21 @@ class ImageOperation : public NodeOperation {
    result.unbind_as_image();
  }

-  /* Get a copy of the image user that is appropriate to retrieve the image buffer for the output
-   * with the given identifier. This essentially sets the appropriate pass and view indices that
-   * corresponds to the output. */
-  ImageUser compute_image_user_for_output(StringRef identifier)
-  {
-    ImageUser image_user = *get_image_user();
-
-    /* Set the needed view. */
-    image_user.view = get_view_index();
-
-    /* Set the needed pass. */
-    if (BKE_image_is_multilayer(get_image())) {
-      image_user.pass = get_pass_index(get_pass_name(identifier));
-      BKE_image_multilayer_index(get_image()->rr, &image_user);
-    }
-    else {
-      BKE_image_multiview_index(get_image(), &image_user);
-    }
-
-    return image_user;
-  }
-
-  /* Get the shader that should be used to compute the output with the given identifier. The
-   * shaders just copy the retrieved image textures into the results except for the alpha output,
-   * which extracts the alpha and writes it to the result instead. Note that a call to a host
-   * texture copy doesn't work because results are stored in a different half float formats. */
-  const char *get_shader_name(StringRef identifier)
-  {
-    if (identifier == "Alpha") {
-      return "compositor_read_input_alpha";
-    }
-    else if (get_result(identifier).type() == ResultType::Color) {
-      return "compositor_read_input_color";
-    }
-    else {
-      return "compositor_read_input_float";
-    }
-  }
-
-  /* Compositor image inputs are expected to be always pre-multiplied, so identify if the GPU
-   * texture returned by the image module is straight and needs to be pre-multiplied. An exception
-   * is when the image has an alpha mode of channel packed or alpha ignore, in which case, we
-   * always ignore pre-multiplication. */
-  bool should_premultiply_alpha(ImageUser &image_user)
-  {
-    Image *image = get_image();
-    if (ELEM(image->alpha_mode, IMA_ALPHA_CHANNEL_PACKED, IMA_ALPHA_IGNORE)) {
-      return false;
-    }
-
-    ImBuf *image_buffer = BKE_image_acquire_ibuf(image, &image_user, nullptr);
-    if (!image_buffer) {
-      return false;
-    }
-
-    const bool has_premultiplied_alpha = BKE_image_has_gpu_texture_premultiplied_alpha(
-        image, image_buffer);
-    BKE_image_release_ibuf(image, image_buffer, nullptr);
-
-    return !has_premultiplied_alpha;
-  }
-
-  Image *get_image()
-  {
-    return (Image *)bnode().id;
-  }
-
-  ImageUser *get_image_user()
-  {
-    return static_cast<ImageUser *>(bnode().storage);
-  }
-
-  /* Get the render layer selected in the node assuming the image is a multilayer image. */
-  RenderLayer *get_render_layer()
-  {
-    const ListBase *layers = &get_image()->rr->layers;
-    return static_cast<RenderLayer *>(BLI_findlink(layers, get_image_user()->layer));
-  }
-
-  /* Get the name of the pass corresponding to the output with the given identifier assuming the
-   * image is a multilayer image. */
+  /* Get the name of the pass corresponding to the output with the given identifier. */
  const char *get_pass_name(StringRef identifier)
  {
    DOutputSocket output = node().output_by_identifier(identifier);
    return static_cast<NodeImageLayer *>(output->storage)->pass_name;
  }

-  /* Get the index of the pass with the given name in the selected render layer's passes list
-   * assuming the image is a multilayer image. */
-  int get_pass_index(const char *name)
+  Image *get_image()
  {
-    return BLI_findstringindex(&get_render_layer()->passes, name, offsetof(RenderPass, name));
+    return reinterpret_cast<Image *>(bnode().id);
  }

-  /* Get the index of the view selected in the node. If the image is not a multi-view image or only
-   * has a single view, then zero is returned. Otherwise, if the image is a multi-view image, the
-   * index of the selected view is returned. However, note that the value of the view member of the
-   * image user is not the actual index of the view. More specifically, the index 0 is reserved to
-   * denote the special mode of operation "All", which dynamically selects the view whose name
-   * matches the view currently being rendered. It follows that the views are then indexed starting
-   * from 1. So for non zero view values, the actual index of the view is the value of the view
-   * member of the image user minus 1. */
-  int get_view_index()
+  ImageUser *get_image_user()
  {
-    /* The image is not a multi-view image, so just return zero. */
-    if (!BKE_image_is_multiview(get_image())) {
-      return 0;
-    }
-
-    const ListBase *views = &get_image()->rr->views;
-    /* There is only one view and its index is 0. */
-    if (BLI_listbase_count_at_most(views, 2) < 2) {
-      return 0;
-    }
-
-    const int view = get_image_user()->view;
-    /* The view is not zero, which means it is manually specified and the actual index is then the
-     * view value minus 1. */
-    if (view != 0) {
-      return view - 1;
-    }
-
-    /* Otherwise, the view value is zero, denoting the special mode of operation "All", which finds
-     * the index of the view whose name matches the view currently being rendered. */
-    const char *view_name = context().get_view_name().data();
-    const int matched_view = BLI_findstringindex(views, view_name, offsetof(RenderView, name));
-
-    /* No view matches the view currently being rendered, so fallback to the first view. */
-    if (matched_view == -1) {
-      return 0;
-    }
-
-    return matched_view;
+    return static_cast<ImageUser *>(bnode().storage);
  }
 };

--- a/source/blender/render/CMakeLists.txt
+++ b/source/blender/render/CMakeLists.txt
@@ -11,6 +11,7 @@ set(INC
  ../compositor/realtime_compositor
  ../compositor/realtime_compositor/cached_resources
  ../draw
+  ../draw/intern
  ../gpu
  ../gpu/intern
  ../imbuf
--- a/source/blender/render/intern/compositor.cc
+++ b/source/blender/render/intern/compositor.cc
@@ -10,12 +10,15 @@

 #include "MEM_guardedalloc.h"

+#include "DNA_ID.h"
+
 #include "BKE_global.h"
 #include "BKE_image.h"
 #include "BKE_node.hh"
 #include "BKE_scene.h"

 #include "DRW_engine.h"
+#include "DRW_render.h"

 #include "IMB_colormanagement.h"
 #include "IMB_imbuf.h"
@@ -334,10 +337,13 @@ class Context : public realtime_compositor::Context {
     * incomplete support, and leave more specific message to individual nodes? */
  }

-  IDRecalcFlag query_id_recalc_flag(ID * /*id*/) const override
+  IDRecalcFlag query_id_recalc_flag(ID *id) const override
  {
-    /* TODO: implement? */
-    return IDRecalcFlag(0);
+    DrawEngineType *owner = (DrawEngineType *)this;
+    DrawData *draw_data = DRW_drawdata_ensure(id, owner, sizeof(DrawData), nullptr, nullptr);
+    IDRecalcFlag recalc_flag = IDRecalcFlag(draw_data->recalc);
+    draw_data->recalc = IDRecalcFlag(0);
+    return recalc_flag;
  }

  void output_to_render_result()