ImBuf: multi-thread IMB_byte_from_float / IMB_float_from_byte

Both were largely or completely single threaded. They are used in various places, but testing their usage in VSE compositor modifier branch (!139634), applying a default "do nothing" compositor modifier on a 1080p image (on Ryzen 5950X): 51.4ms -> 12.2ms Details about IMB_byte_from_float: - No longer allocate a full new float buffer, instead do all work in a local small (32KB size, half of typical L1 cache) job-local buffer. - Previous code was doing un-premultiply + OCIO + premultiply + un-premultiply again. That is pointless; just do un-premultiply once. Details about IMB_float_from_byte / IMB_float_from_byte_ex: - Remove incorrect code around"allocate float buffer outside of image buffer" since it was not actually true to begin with. - Inside threaded part, do color space conversion and premultiply at once per-scanline, so that data stays in CPU caches more. Pull Request: https://projects.blender.org/blender/blender/pulls/145716
2025-09-05 18:59:18 +02:00
parent 68e5851615
commit 44b7d7592d
1 changed files with 80 additions and 70 deletions
--- a/source/blender/imbuf/intern/conversion.cc
+++ b/source/blender/imbuf/intern/conversion.cc
@@ -7,6 +7,7 @@
 * \ingroup imbuf
 */

+#include "BLI_array.hh"
 #include "BLI_rect.h"
 #include "BLI_task.hh"

@@ -603,14 +604,16 @@ void IMB_buffer_byte_from_byte(uchar *rect_to,

 void IMB_byte_from_float(ImBuf *ibuf)
 {
-  /* verify we have a float buffer */
+  using namespace blender;
+
+  /* Nothing to do if there's no float buffer */
  if (ibuf->float_buffer.data == nullptr) {
    return;
  }

-  /* create byte rect if it didn't exist yet */
+  /* Allocate byte buffer if needed. */
  if (ibuf->byte_buffer.data == nullptr) {
-    if (IMB_alloc_byte_pixels(ibuf, false) == 0) {
+    if (!IMB_alloc_byte_pixels(ibuf, false)) {
      return;
    }
  }
@@ -623,33 +626,49 @@ void IMB_byte_from_float(ImBuf *ibuf)
                                  IMB_colormanagement_role_colorspace_name_get(
                                      COLOR_ROLE_DEFAULT_BYTE) :
                                  ibuf->byte_buffer.colorspace->name().c_str();
-
-  float *buffer = static_cast<float *>(MEM_dupallocN(ibuf->float_buffer.data));
-
-  /* first make float buffer in byte space */
  const bool predivide = IMB_alpha_affects_rgb(ibuf);
-  IMB_colormanagement_transform_float(
-      buffer, ibuf->x, ibuf->y, ibuf->channels, from_colorspace, to_colorspace, predivide);
-
-  /* convert from float's premul alpha to byte's straight alpha */
-  if (IMB_alpha_affects_rgb(ibuf)) {
-    IMB_unpremultiply_rect_float(buffer, ibuf->channels, ibuf->x, ibuf->y);
+  ColormanageProcessor *processor = STREQ(from_colorspace, to_colorspace) ?
+                                        nullptr :
+                                        IMB_colormanagement_colorspace_processor_new(
+                                            from_colorspace, to_colorspace);
+  if (processor && IMB_colormanagement_processor_is_noop(processor)) {
+    IMB_colormanagement_processor_free(processor);
+    processor = nullptr;
  }

-  /* convert float to byte */
-  IMB_buffer_byte_from_float(ibuf->byte_buffer.data,
-                             buffer,
-                             ibuf->channels,
-                             ibuf->dither,
-                             IB_PROFILE_SRGB,
-                             IB_PROFILE_SRGB,
-                             false,
-                             ibuf->x,
-                             ibuf->y,
-                             ibuf->x,
-                             ibuf->x);
-
-  MEM_freeN(buffer);
+  /* At 4 floats per pixel, this is 32KB of data, and fits into typical CPU L1 cache. */
+  static constexpr int grain_size = 2048;
+  threading::parallel_for(
+      IndexRange(IMB_get_pixel_count(ibuf)), grain_size, [&](const IndexRange range) {
+        /* Copy chunk of source float pixels into a local buffer. */
+        Array<float, grain_size * 4> buffer(range.size() * ibuf->channels);
+        buffer.as_mutable_span().copy_from(
+            Span(ibuf->float_buffer.data + range.first() * ibuf->channels, buffer.size()));
+        /* Unpremultiply alpha if needed. */
+        if (predivide) {
+          IMB_unpremultiply_rect_float(buffer.data(), ibuf->channels, range.size(), 1);
+        }
+        /* Convert to byte color space if needed. */
+        if (processor) {
+          IMB_colormanagement_processor_apply(
+              processor, buffer.data(), range.size(), 1, ibuf->channels, false);
+        }
+        /* Convert to bytes. */
+        IMB_buffer_byte_from_float(ibuf->byte_buffer.data + range.first() * 4,
+                                   buffer.data(),
+                                   ibuf->channels,
+                                   ibuf->dither,
+                                   IB_PROFILE_SRGB,
+                                   IB_PROFILE_SRGB,
+                                   false,
+                                   range.size(),
+                                   1,
+                                   ibuf->x,
+                                   ibuf->x);
+      });
+  if (processor != nullptr) {
+    IMB_colormanagement_processor_free(processor);
+  }

  /* ensure user flag is reset */
  ibuf->userflags &= ~IB_RECT_INVALID;
@@ -657,6 +676,8 @@ void IMB_byte_from_float(ImBuf *ibuf)

 void IMB_float_from_byte_ex(ImBuf *dst, const ImBuf *src, const rcti *region_to_update)
 {
+  using namespace blender;
+
  BLI_assert_msg(dst->float_buffer.data != nullptr,
                 "Destination buffer should have a float buffer assigned.");
  BLI_assert_msg(src->byte_buffer.data != nullptr,
@@ -673,64 +694,53 @@ void IMB_float_from_byte_ex(ImBuf *dst, const ImBuf *src, const rcti *region_to_
  BLI_assert_msg(region_to_update->ymax <= dst->y,
                 "Region to update should be clipped to the given buffers.");

-  float *rect_float = dst->float_buffer.data;
-  rect_float += (region_to_update->xmin + region_to_update->ymin * dst->x) * 4;
-  uchar *rect = src->byte_buffer.data;
-  rect += (region_to_update->xmin + region_to_update->ymin * dst->x) * 4;
  const int region_width = BLI_rcti_size_x(region_to_update);
  const int region_height = BLI_rcti_size_y(region_to_update);
+  const bool premultiply_alpha = IMB_alpha_affects_rgb(src);

-  /* Convert byte buffer to float buffer without color or alpha conversion. */
-  IMB_buffer_float_from_byte(rect_float,
-                             rect,
-                             IB_PROFILE_SRGB,
-                             IB_PROFILE_SRGB,
-                             false,
-                             region_width,
-                             region_height,
-                             src->x,
-                             dst->x);
+  threading::parallel_for(
+      IndexRange(region_to_update->ymin, region_height), 64, [&](const IndexRange y_range) {
+        const uchar *src_ptr = src->byte_buffer.data;
+        src_ptr += (region_to_update->xmin + y_range.first() * dst->x) * 4;
+        float *dst_ptr = dst->float_buffer.data;
+        dst_ptr += (region_to_update->xmin + y_range.first() * dst->x) * 4;

-  /* Perform color space conversion from rect color space to linear. */
-  float *float_ptr = rect_float;
-  for (int i = 0; i < region_height; i++) {
-    IMB_colormanagement_colorspace_to_scene_linear(
-        float_ptr, region_width, 1, dst->channels, src->byte_buffer.colorspace, false);
-    float_ptr += 4 * dst->x;
-  }
+        /* Convert byte -> float without color or alpha conversions. */
+        IMB_buffer_float_from_byte(dst_ptr,
+                                   src_ptr,
+                                   IB_PROFILE_SRGB,
+                                   IB_PROFILE_SRGB,
+                                   false,
+                                   region_width,
+                                   y_range.size(),
+                                   src->x,
+                                   dst->x);

-  /* Perform alpha conversion. */
-  if (IMB_alpha_affects_rgb(src)) {
-    float_ptr = rect_float;
-    for (int i = 0; i < region_height; i++) {
-      IMB_premultiply_rect_float(float_ptr, dst->channels, region_width, 1);
-      float_ptr += 4 * dst->x;
-    }
-  }
+        /* Convert to scene linear color space, and premultiply alpha if needed. */
+        float *dst_ptr_line = dst_ptr;
+        for ([[maybe_unused]] const int64_t y : y_range) {
+          IMB_colormanagement_colorspace_to_scene_linear(
+              dst_ptr_line, region_width, 1, dst->channels, src->byte_buffer.colorspace, false);
+          if (premultiply_alpha) {
+            IMB_premultiply_rect_float(dst_ptr_line, dst->channels, region_width, 1);
+          }
+          dst_ptr_line += 4 * dst->x;
+        }
+      });
 }

 void IMB_float_from_byte(ImBuf *ibuf)
 {
-  /* verify if we byte and float buffers */
+  /* Nothing to do if there's no byte buffer. */
  if (ibuf->byte_buffer.data == nullptr) {
    return;
  }

-  /* allocate float buffer outside of image buffer,
-   * so work-in-progress color space conversion doesn't
-   * interfere with other parts of blender
-   */
-  float *rect_float = ibuf->float_buffer.data;
-  if (rect_float == nullptr) {
-    rect_float = MEM_calloc_arrayN<float>(4 * IMB_get_pixel_count(ibuf), "IMB_float_from_byte");
-
-    if (rect_float == nullptr) {
+  /* Allocate float buffer if needed. */
+  if (ibuf->float_buffer.data == nullptr) {
+    if (!IMB_alloc_float_pixels(ibuf, 4, false)) {
      return;
    }
-
-    ibuf->channels = 4;
-
-    IMB_assign_float_buffer(ibuf, rect_float, IB_TAKE_OWNERSHIP);
  }

  rcti region_to_update;