VSE: Faster waveform & vectorscope calculation

Waveform, Parade and Vectorscopes were calculated by copying the rendered image, transforming it into display space, and calculating the the scope from that. On large resolutions, this copy+transform+free of the image was taking up majority of the time. Especially for default case when the display transform is a no-op. Change the code so that display transform, if needed, is done directly inside scope calculation, without needing a full-size temporary image. Additionally, the vectorscope calculation was single threaded. Multi-thread it by doing a parallel reduction, where each job calculates their own scope image, and they are merged. Since job payload is fairly large (512x512 bytes), make jobs pretty large (256k pixels each). Time (in ms) taken to calculate scope at 4K resolution (Ryzen 5950X, Windows). Default color management settings: - Waveform, PNG/SDR: 5.5 -> 5.2 - Waveform, EXR/HDR: 33.5 -> 10.3 - Vectorscope, PNG/SDR: 32.4 -> 4.5 - Vectorscope, EXR/HDR: 53.2 -> 9.8 Timings when additional color space management is needed (display space set to Display P3, sequencer kept at sRGB): - Waveform, PNG/SDR: 29.5 -> 10.9 - Waveform, EXR/HDR: 67.6 -> 10.9 - Vectorscope, PNG/SDR: 56.8 -> 12.0 - Vectorscope, EXR/HDR: 85.9 -> 13.4 This also fixes calculation of waveform / vectorscope on float (HDR) images that have alpha channel; the scope was wrongly calculated on premultiplied color values, which was not consistent with how it was calculated on the byte images. Pull Request: https://projects.blender.org/blender/blender/pulls/144059
2025-08-06 16:24:05 +02:00
parent ef6f74f658
commit 8222cb0cfd
6 changed files with 295 additions and 142 deletions
--- a/source/blender/blenlib/BLI_math_color.h
+++ b/source/blender/blenlib/BLI_math_color.h
@@ -137,10 +137,10 @@ void rgb_float_set_hue_float_offset(float rgb[3], float hue_offset);
 */
 void rgb_byte_set_hue_float_offset(unsigned char rgb[3], float hue_offset);

-void rgb_uchar_to_float(float r_col[3], const unsigned char col_ub[3]);
-void rgba_uchar_to_float(float r_col[4], const unsigned char col_ub[4]);
-void rgb_float_to_uchar(unsigned char r_col[3], const float col_f[3]);
-void rgba_float_to_uchar(unsigned char r_col[4], const float col_f[4]);
+MINLINE void rgb_uchar_to_float(float r_col[3], const unsigned char col_ub[3]);
+MINLINE void rgba_uchar_to_float(float r_col[4], const unsigned char col_ub[4]);
+MINLINE void rgb_float_to_uchar(unsigned char r_col[3], const float col_f[3]);
+MINLINE void rgba_float_to_uchar(unsigned char r_col[4], const float col_f[4]);

 /**
 * Compute luminance using Rec.709 primaries, for sRGB and linear Rec.709.
--- a/source/blender/blenlib/intern/math_color.cc
+++ b/source/blender/blenlib/intern/math_color.cc
@@ -393,31 +393,6 @@ void cpack_to_rgb(uint col, float *r_r, float *r_g, float *r_b)
  *r_b = float((col >> 16) & 0xFF) * (1.0f / 255.0f);
 }

-void rgb_uchar_to_float(float r_col[3], const uchar col_ub[3])
-{
-  r_col[0] = float(col_ub[0]) * (1.0f / 255.0f);
-  r_col[1] = float(col_ub[1]) * (1.0f / 255.0f);
-  r_col[2] = float(col_ub[2]) * (1.0f / 255.0f);
-}
-
-void rgba_uchar_to_float(float r_col[4], const uchar col_ub[4])
-{
-  r_col[0] = float(col_ub[0]) * (1.0f / 255.0f);
-  r_col[1] = float(col_ub[1]) * (1.0f / 255.0f);
-  r_col[2] = float(col_ub[2]) * (1.0f / 255.0f);
-  r_col[3] = float(col_ub[3]) * (1.0f / 255.0f);
-}
-
-void rgb_float_to_uchar(uchar r_col[3], const float col_f[3])
-{
-  unit_float_to_uchar_clamp_v3(r_col, col_f);
-}
-
-void rgba_float_to_uchar(uchar r_col[4], const float col_f[4])
-{
-  unit_float_to_uchar_clamp_v4(r_col, col_f);
-}
-
 /* ********************************* color transforms ********************************* */

 float srgb_to_linearrgb(float c)
--- a/source/blender/blenlib/intern/math_color_inline.cc
+++ b/source/blender/blenlib/intern/math_color_inline.cc
@@ -145,6 +145,31 @@ MINLINE void srgb_to_linearrgb_uchar4_predivide(float linear[4], const unsigned
  srgb_to_linearrgb_predivide_v4(linear, fsrgb);
 }

+MINLINE void rgb_uchar_to_float(float r_col[3], const uchar col_ub[3])
+{
+  r_col[0] = float(col_ub[0]) * (1.0f / 255.0f);
+  r_col[1] = float(col_ub[1]) * (1.0f / 255.0f);
+  r_col[2] = float(col_ub[2]) * (1.0f / 255.0f);
+}
+
+MINLINE void rgba_uchar_to_float(float r_col[4], const uchar col_ub[4])
+{
+  r_col[0] = float(col_ub[0]) * (1.0f / 255.0f);
+  r_col[1] = float(col_ub[1]) * (1.0f / 255.0f);
+  r_col[2] = float(col_ub[2]) * (1.0f / 255.0f);
+  r_col[3] = float(col_ub[3]) * (1.0f / 255.0f);
+}
+
+MINLINE void rgb_float_to_uchar(uchar r_col[3], const float col_f[3])
+{
+  unit_float_to_uchar_clamp_v3(r_col, col_f);
+}
+
+MINLINE void rgba_float_to_uchar(uchar r_col[4], const float col_f[4])
+{
+  unit_float_to_uchar_clamp_v4(r_col, col_f);
+}
+
 MINLINE void rgba_uchar_args_set(
    uchar col[4], const uchar r, const uchar g, const uchar b, const uchar a)
 {
--- a/source/blender/editors/space_sequencer/sequencer_preview_draw.cc
+++ b/source/blender/editors/space_sequencer/sequencer_preview_draw.cc
@@ -171,23 +171,6 @@ ImBuf *sequencer_ibuf_get(const bContext *C, const int timeline_frame, const cha
  return ibuf;
 }

-static ImBuf *sequencer_make_scope(const ColorManagedViewSettings &view_settings,
-                                   const ColorManagedDisplaySettings &display_settings,
-                                   const ImBuf &ibuf,
-                                   ImBuf *(*make_scope_fn)(const ImBuf *ibuf))
-{
-  ImBuf *display_ibuf = IMB_dupImBuf(&ibuf);
-  ImBuf *scope;
-
-  IMB_colormanagement_imbuf_make_display_space(display_ibuf, &view_settings, &display_settings);
-
-  scope = make_scope_fn(display_ibuf);
-
-  IMB_freeImBuf(display_ibuf);
-
-  return scope;
-}
-
 static void sequencer_display_size(const RenderData &render_data, float r_viewrect[2])
 {
  r_viewrect[0] = float(render_data.xsch);
@@ -821,14 +804,14 @@ static bool sequencer_calc_scopes(const SpaceSeq &space_sequencer,
      break;
    case SEQ_DRAW_IMG_WAVEFORM:
      if (!scopes->waveform_ibuf) {
-        scopes->waveform_ibuf = sequencer_make_scope(
-            view_settings, display_settings, ibuf, make_waveform_view_from_ibuf);
+        scopes->waveform_ibuf = make_waveform_view_from_ibuf(
+            &ibuf, view_settings, display_settings);
      }
      break;
    case SEQ_DRAW_IMG_VECTORSCOPE:
      if (!scopes->vector_ibuf) {
-        scopes->vector_ibuf = sequencer_make_scope(
-            view_settings, display_settings, ibuf, make_vectorscope_view_from_ibuf);
+        scopes->vector_ibuf = make_vectorscope_view_from_ibuf(
+            &ibuf, view_settings, display_settings);
      }
      break;
    case SEQ_DRAW_IMG_HISTOGRAM: {
@@ -836,8 +819,8 @@ static bool sequencer_calc_scopes(const SpaceSeq &space_sequencer,
    } break;
    case SEQ_DRAW_IMG_RGBPARADE:
      if (!scopes->sep_waveform_ibuf) {
-        scopes->sep_waveform_ibuf = sequencer_make_scope(
-            view_settings, display_settings, ibuf, make_sep_waveform_view_from_ibuf);
+        scopes->sep_waveform_ibuf = make_sep_waveform_view_from_ibuf(
+            &ibuf, view_settings, display_settings);
      }
      break;
    default: /* Future files might have scopes we don't know about. */
--- a/source/blender/editors/space_sequencer/sequencer_scopes.cc
+++ b/source/blender/editors/space_sequencer/sequencer_scopes.cc
@@ -105,7 +105,48 @@ static void init_wave_table(int height, uchar wtable[256])
  }
 }

-ImBuf *make_waveform_view_from_ibuf(const ImBuf *ibuf)
+static void rgba_float_to_display_space(ColormanageProcessor *processor,
+                                        const ColorSpace *src_colorspace,
+                                        MutableSpan<float4> pixels)
+{
+  IMB_colormanagement_colorspace_to_scene_linear(
+      &pixels.data()->x, pixels.size(), 1, 4, src_colorspace, false);
+  IMB_colormanagement_processor_apply(processor, &pixels.data()->x, pixels.size(), 1, 4, false);
+}
+
+static Array<float4> pixels_to_display_space(ColormanageProcessor *processor,
+                                             const ColorSpace *src_colorspace,
+                                             int64_t num,
+                                             const float *src,
+                                             int64_t stride)
+{
+  Array<float4> result(num, NoInitialization());
+  for (int64_t i : result.index_range()) {
+    premul_to_straight_v4_v4(result[i], src);
+    src += stride;
+  }
+  rgba_float_to_display_space(processor, src_colorspace, result);
+  return result;
+}
+
+static Array<float4> pixels_to_display_space(ColormanageProcessor *processor,
+                                             const ColorSpace *src_colorspace,
+                                             int64_t num,
+                                             const uchar *src,
+                                             int64_t stride)
+{
+  Array<float4> result(num, NoInitialization());
+  for (int64_t i : result.index_range()) {
+    rgba_uchar_to_float(result[i], src);
+    src += stride;
+  }
+  rgba_float_to_display_space(processor, src_colorspace, result);
+  return result;
+}
+
+ImBuf *make_waveform_view_from_ibuf(const ImBuf *ibuf,
+                                    const ColorManagedViewSettings &view_settings,
+                                    const ColorManagedDisplaySettings &display_settings)
 {
 #ifdef DEBUG_TIME
  SCOPED_TIMER(__func__);
@@ -118,6 +159,9 @@ ImBuf *make_waveform_view_from_ibuf(const ImBuf *ibuf)
  uchar wtable[256];
  init_wave_table(ibuf->y, wtable);

+  ColormanageProcessor *cm_processor = IMB_colormanagement_display_processor_for_imbuf(
+      ibuf, &view_settings, &display_settings);
+
  /* IMB_colormanagement_get_luminance_byte for each pixel is quite a lot of
   * overhead, so instead get luma coefficients as 16-bit integers. */
  float coeffs[3];
@@ -125,46 +169,75 @@ ImBuf *make_waveform_view_from_ibuf(const ImBuf *ibuf)
  const int muls[3] = {int(coeffs[0] * 65535), int(coeffs[1] * 65535), int(coeffs[2] * 65535)};

  /* Parallel over x, since each column is easily independent from others. */
-  threading::parallel_for(IndexRange(ibuf->x), 32, [&](IndexRange x_range) {
+  threading::parallel_for_each(IndexRange(ibuf->x), [&](const int x) {
    if (ibuf->float_buffer.data) {
-      /* Float image. */
-      const float *src = ibuf->float_buffer.data;
-      for (int y = 0; y < ibuf->y; y++) {
-        for (const int x : x_range) {
-          const float *rgb = src + 4 * (ibuf->x * y + x);
-          float v = IMB_colormanagement_get_luminance(rgb);
+      const float *src = ibuf->float_buffer.data + x * 4;
+      if (!cm_processor) {
+        /* Float image, no color space conversions needed. */
+        for (int y = 0; y < ibuf->y; y++) {
+          float4 pixel;
+          premul_to_straight_v4_v4(pixel, src);
+          float v = dot_v3v3(pixel, coeffs);
+          uchar *p = tgt;
+          int iv = clamp_i(int(v * h), 0, h - 1);
+          p += 4 * (w * iv + x);
+          scope_put_pixel(wtable, p);
+          src += ibuf->x * 4;
+        }
+      }
+      else {
+        /* Float image, with color space conversions. */
+        Array<float4> pixels = pixels_to_display_space(
+            cm_processor, ibuf->float_buffer.colorspace, ibuf->y, src, ibuf->x * 4);
+        for (int y = 0; y < ibuf->y; y++) {
+          float v = dot_v3v3(pixels[y], coeffs);
          uchar *p = tgt;
-
          int iv = clamp_i(int(v * h), 0, h - 1);
-
          p += 4 * (w * iv + x);
          scope_put_pixel(wtable, p);
        }
      }
    }
    else {
-      /* Byte image. */
-      const uchar *src = ibuf->byte_buffer.data;
-      for (int y = 0; y < ibuf->y; y++) {
-        for (const int x : x_range) {
-          const uchar *rgb = src + 4 * (ibuf->x * y + x);
+      const uchar *src = ibuf->byte_buffer.data + x * 4;
+      if (!cm_processor) {
+        /* Byte image, no color space conversions needed. */
+        for (int y = 0; y < ibuf->y; y++) {
          /* +1 is "Sree's solution" from http://stereopsis.com/doubleblend.html */
-          int rgb0 = rgb[0] + 1;
-          int rgb1 = rgb[1] + 1;
-          int rgb2 = rgb[2] + 1;
+          int rgb0 = src[0] + 1;
+          int rgb1 = src[1] + 1;
+          int rgb2 = src[2] + 1;
          int luma = (rgb0 * muls[0] + rgb1 * muls[1] + rgb2 * muls[2]) >> 16;
          int luma_y = clamp_i(luma, 0, 255);
          uchar *p = tgt + 4 * (w * luma_y + x);
          scope_put_pixel(wtable, p);
+          src += ibuf->x * 4;
+        }
+      }
+      else {
+        /* Byte image, with color space conversions. */
+        Array<float4> pixels = pixels_to_display_space(
+            cm_processor, ibuf->byte_buffer.colorspace, ibuf->y, src, ibuf->x * 4);
+        for (int y = 0; y < ibuf->y; y++) {
+          float v = dot_v3v3(pixels[y], coeffs);
+          uchar *p = tgt;
+          int iv = clamp_i(int(v * h), 0, h - 1);
+          p += 4 * (w * iv + x);
+          scope_put_pixel(wtable, p);
        }
      }
    }
  });

+  if (cm_processor) {
+    IMB_colormanagement_processor_free(cm_processor);
+  }
  return rval;
 }

-ImBuf *make_sep_waveform_view_from_ibuf(const ImBuf *ibuf)
+ImBuf *make_sep_waveform_view_from_ibuf(const ImBuf *ibuf,
+                                        const ColorManagedViewSettings &view_settings,
+                                        const ColorManagedDisplaySettings &display_settings)
 {
 #ifdef DEBUG_TIME
  SCOPED_TIMER(__func__);
@@ -178,19 +251,38 @@ ImBuf *make_sep_waveform_view_from_ibuf(const ImBuf *ibuf)
  uchar wtable[256];
  init_wave_table(ibuf->y, wtable);

+  ColormanageProcessor *cm_processor = IMB_colormanagement_display_processor_for_imbuf(
+      ibuf, &view_settings, &display_settings);
+
  /* Parallel over x, since each column is easily independent from others. */
-  threading::parallel_for(IndexRange(ibuf->x), 32, [&](IndexRange x_range) {
+  threading::parallel_for_each(IndexRange(ibuf->x), [&](const int x) {
    if (ibuf->float_buffer.data) {
-      /* Float image. */
-      const float *src = ibuf->float_buffer.data;
-      for (int y = 0; y < ibuf->y; y++) {
-        for (const int x : x_range) {
-          const float *rgb = src + 4 * (ibuf->x * y + x);
+      const float *src = ibuf->float_buffer.data + x * 4;
+      if (!cm_processor) {
+        /* Float image, no color space conversions needed. */
+        for (int y = 0; y < ibuf->y; y++) {
+          float4 pixel;
+          premul_to_straight_v4_v4(pixel, src);
          for (int c = 0; c < 3; c++) {
            uchar *p = tgt;
-            float v = rgb[c];
+            float v = pixel[c];
+            int iv = clamp_i(int(v * h), 0, h - 1);
+            p += 4 * (w * iv + c * sw + x / 3);
+            scope_put_pixel_single(wtable, p, c);
+          }
+          src += ibuf->x * 4;
+        }
+      }
+      else {
+        /* Float image, with color space conversions. */
+        Array<float4> pixels = pixels_to_display_space(
+            cm_processor, ibuf->float_buffer.colorspace, ibuf->y, src, ibuf->x * 4);
+        for (int y = 0; y < ibuf->y; y++) {
+          float4 pixel = pixels[y];
+          for (int c = 0; c < 3; c++) {
+            uchar *p = tgt;
+            float v = pixel[c];
            int iv = clamp_i(int(v * h), 0, h - 1);
-
            p += 4 * (w * iv + c * sw + x / 3);
            scope_put_pixel_single(wtable, p, c);
          }
@@ -198,14 +290,29 @@ ImBuf *make_sep_waveform_view_from_ibuf(const ImBuf *ibuf)
      }
    }
    else {
-      /* Byte image. */
-      const uchar *src = ibuf->byte_buffer.data;
-      for (int y = 0; y < ibuf->y; y++) {
-        for (const int x : x_range) {
-          const uchar *rgb = src + 4 * (ibuf->x * y + x);
+      const uchar *src = ibuf->byte_buffer.data + x * 4;
+      if (!cm_processor) {
+        /* Byte image, no color space conversions needed. */
+        for (int y = 0; y < ibuf->y; y++) {
          for (int c = 0; c < 3; c++) {
            uchar *p = tgt;
-            p += 4 * (w * rgb[c] + c * sw + x / 3);
+            p += 4 * (w * src[c] + c * sw + x / 3);
+            scope_put_pixel_single(wtable, p, c);
+          }
+          src += ibuf->x * 4;
+        }
+      }
+      else {
+        /* Byte image, with color space conversions. */
+        Array<float4> pixels = pixels_to_display_space(
+            cm_processor, ibuf->byte_buffer.colorspace, ibuf->y, src, ibuf->x * 4);
+        for (int y = 0; y < ibuf->y; y++) {
+          float4 pixel = pixels[y];
+          for (int c = 0; c < 3; c++) {
+            uchar *p = tgt;
+            float v = pixel[c];
+            int iv = clamp_i(int(v * h), 0, h - 1);
+            p += 4 * (w * iv + c * sw + x / 3);
            scope_put_pixel_single(wtable, p, c);
          }
        }
@@ -213,6 +320,9 @@ ImBuf *make_sep_waveform_view_from_ibuf(const ImBuf *ibuf)
    }
  });

+  if (cm_processor) {
+    IMB_colormanagement_processor_free(cm_processor);
+  }
  return rval;
 }

@@ -318,14 +428,8 @@ void ScopeHistogram::calc_from_ibuf(const ImBuf *ibuf,
          }
          else {
            /* Float image, with color space conversions. */
-            Array<float4> pixels(range.size(), NoInitialization());
-            for (int64_t i : pixels.index_range()) {
-              premul_to_straight_v4_v4(pixels[i], src + i * 4);
-            }
-            IMB_colormanagement_colorspace_to_scene_linear(
-                &pixels.data()->x, pixels.size(), 1, 4, ibuf->float_buffer.colorspace, false);
-            IMB_colormanagement_processor_apply(
-                cm_processor, &pixels.data()->x, pixels.size(), 1, 4, false);
+            Array<float4> pixels = pixels_to_display_space(
+                cm_processor, ibuf->float_buffer.colorspace, range.size(), src, 4);
            for (const float4 &pixel : pixels) {
              res[get_bin_float(pixel.x)].x++;
              res[get_bin_float(pixel.y)].y++;
@@ -347,14 +451,8 @@ void ScopeHistogram::calc_from_ibuf(const ImBuf *ibuf,
          }
          else {
            /* Byte image, with color space conversions. */
-            Array<float4> pixels(range.size(), NoInitialization());
-            for (int64_t i : pixels.index_range()) {
-              rgba_uchar_to_float(pixels[i], src + i * 4);
-            }
-            IMB_colormanagement_colorspace_to_scene_linear(
-                &pixels.data()->x, pixels.size(), 1, 4, ibuf->byte_buffer.colorspace, false);
-            IMB_colormanagement_processor_apply(
-                cm_processor, &pixels.data()->x, pixels.size(), 1, 4, false);
+            Array<float4> pixels = pixels_to_display_space(
+                cm_processor, ibuf->byte_buffer.colorspace, range.size(), src, 4);
            for (const float4 &pixel : pixels) {
              uchar pixel_b[4];
              rgba_float_to_uchar(pixel_b, pixel);
@@ -366,6 +464,7 @@ void ScopeHistogram::calc_from_ibuf(const ImBuf *ibuf,
        }
        return res;
      },
+      /* Merge histograms computed per-thread. */
      [&](const Array<uint3> &a, const Array<uint3> &b) {
        BLI_assert(a.size() == b.size());
        Array<uint3> res(a.size());
@@ -385,57 +484,122 @@ void ScopeHistogram::calc_from_ibuf(const ImBuf *ibuf,
  }
 }

-ImBuf *make_vectorscope_view_from_ibuf(const ImBuf *ibuf)
+ImBuf *make_vectorscope_view_from_ibuf(const ImBuf *ibuf,
+                                       const ColorManagedViewSettings &view_settings,
+                                       const ColorManagedDisplaySettings &display_settings)
 {
 #ifdef DEBUG_TIME
  SCOPED_TIMER(__func__);
 #endif
-  const int size = 512;
+  constexpr int size = 512;
  const float size_mul = size - 1.0f;
-  ImBuf *rval = IMB_allocImBuf(size, size, 32, IB_byte_data);

-  uchar *dst = rval->byte_buffer.data;
-  float rgb[3];
+  ColormanageProcessor *cm_processor = IMB_colormanagement_display_processor_for_imbuf(
+      ibuf, &view_settings, &display_settings);

+  const bool is_float = ibuf->float_buffer.data != nullptr;
+  /* Vector scope is calculated by scattering writes into the resulting scope image. Do it with
+   * parallel reduce, by filling a separate image per job and merging them. Since the payload
+   * of each job is fairly large, make the jobs large enough too. */
+  constexpr int64_t grain_size = 256 * 1024;
+  Array<uint8_t> counts(size * size, uint8_t(0));
+  Array<uint8_t> data = threading::parallel_reduce(
+      IndexRange(IMB_get_pixel_count(ibuf)),
+      grain_size,
+      counts,
+      [&](const IndexRange range, const Array<uint8_t> &init) {
+        Array<uint8_t> res = init;
+
+        const float *src_f = is_float ? ibuf->float_buffer.data + range.first() * 4 : nullptr;
+        const uchar *src_b = !is_float ? ibuf->byte_buffer.data + range.first() * 4 : nullptr;
+        if (cm_processor) {
+          /* Byte or float image, color space conversions needed. Do them in smaller chunks
+           * than the whole job size, so they fit into CPU cache and can be on the stack. */
+          constexpr int64_t chunk_size = 4 * 1024;
+          float4 pixels[chunk_size];
+          for (int64_t index = 0; index < range.size(); index += chunk_size) {
+            const int64_t sub_size = std::min(range.size() - index, chunk_size);
+            if (is_float) {
+              for (int64_t i = 0; i < sub_size; i++) {
+                premul_to_straight_v4_v4(pixels[i], src_f);
+                src_f += 4;
+              }
+            }
+            else {
+              for (int64_t i = 0; i < sub_size; i++) {
+                rgba_uchar_to_float(pixels[i], src_b);
+                src_b += 4;
+              }
+            }
+            MutableSpan<float4> pixels_span = MutableSpan<float4>(pixels, sub_size);
+            rgba_float_to_display_space(cm_processor,
+                                        is_float ? ibuf->float_buffer.colorspace :
+                                                   ibuf->byte_buffer.colorspace,
+                                        pixels_span);
+            for (float4 pixel : pixels_span) {
+              clamp_v3(pixel, 0.0f, 1.0f);
+              float2 uv = rgb_to_uv_normalized(pixel) * size_mul;
+              int offset = size * int(uv.y) + int(uv.x);
+              res[offset] = std::min<int>(res[offset] + 1, 255);
+            }
+          }
+        }
+        else if (is_float) {
+          /* Float image, no color space conversions needed. */
+          for ([[maybe_unused]] const int64_t index : range) {
+            float4 pixel;
+            premul_to_straight_v4_v4(pixel, src_f);
+            clamp_v3(pixel, 0.0f, 1.0f);
+            float2 uv = rgb_to_uv_normalized(pixel) * size_mul;
+            int offset = size * int(uv.y) + int(uv.x);
+            res[offset] = std::min<int>(res[offset] + 1, 255);
+            src_f += 4;
+          }
+        }
+        else {
+          /* Byte image, no color space conversions needed. */
+          for ([[maybe_unused]] const int64_t index : range) {
+            float4 pixel;
+            rgb_uchar_to_float(pixel, src_b);
+            float2 uv = rgb_to_uv_normalized(pixel) * size_mul;
+            int offset = size * int(uv.y) + int(uv.x);
+            res[offset] = std::min<int>(res[offset] + 1, 255);
+            src_b += 4;
+          }
+        }
+        return res;
+      },
+      /* Merge scopes computed per-thread. */
+      [&](const Array<uint8_t> &a, const Array<uint8_t> &b) {
+        BLI_assert(a.size() == b.size());
+        Array<uint8_t> res(a.size(), NoInitialization());
+        for (int64_t i : a.index_range()) {
+          res[i] = std::min<int>(a[i] + b[i], 255);
+        }
+        return res;
+      });
+
+  /* Fill the vector scope image from the computed data. */
  uchar wtable[256];
  init_wave_table(math::midpoint(ibuf->x, ibuf->y), wtable);

-  if (ibuf->float_buffer.data) {
-    /* Float image. */
-    const float *src = ibuf->float_buffer.data;
-    for (int y = 0; y < ibuf->y; y++) {
-      for (int x = 0; x < ibuf->x; x++) {
-        memcpy(rgb, src, sizeof(float[3]));
-        clamp_v3(rgb, 0.0f, 1.0f);
-
-        float2 uv = rgb_to_uv_normalized(rgb) * size_mul;
-
-        uchar *p = dst + 4 * (size * int(uv.y) + int(uv.x));
-        scope_put_pixel(wtable, p);
-
-        src += 4;
-      }
-    }
-  }
-  else {
-    /* Byte image. */
-    const uchar *src = ibuf->byte_buffer.data;
-    for (int y = 0; y < ibuf->y; y++) {
-      for (int x = 0; x < ibuf->x; x++) {
-        rgb[0] = float(src[0]) * (1.0f / 255.0f);
-        rgb[1] = float(src[1]) * (1.0f / 255.0f);
-        rgb[2] = float(src[2]) * (1.0f / 255.0f);
-
-        float2 uv = rgb_to_uv_normalized(rgb) * size_mul;
-
-        uchar *p = dst + 4 * (size * int(uv.y) + int(uv.x));
-        scope_put_pixel(wtable, p);
-
-        src += 4;
-      }
+  ImBuf *rval = IMB_allocImBuf(size, size, 32, IB_byte_data | IB_uninitialized_pixels);
+  uchar *dst = rval->byte_buffer.data;
+  for (int i = 0; i < size * size; i++) {
+    uint8_t val = data[i];
+    if (val != 0) {
+      val = wtable[val];
    }
+    dst[0] = val;
+    dst[1] = val;
+    dst[2] = val;
+    dst[3] = 255;
+    dst += 4;
  }

+  if (cm_processor) {
+    IMB_colormanagement_processor_free(cm_processor);
+  }
  return rval;
 }

--- a/source/blender/editors/space_sequencer/sequencer_scopes.hh
+++ b/source/blender/editors/space_sequencer/sequencer_scopes.hh
@@ -55,9 +55,15 @@ struct SeqScopes : public NonCopyable {
  void cleanup();
 };

-ImBuf *make_waveform_view_from_ibuf(const ImBuf *ibuf);
-ImBuf *make_sep_waveform_view_from_ibuf(const ImBuf *ibuf);
-ImBuf *make_vectorscope_view_from_ibuf(const ImBuf *ibuf);
+ImBuf *make_waveform_view_from_ibuf(const ImBuf *ibuf,
+                                    const ColorManagedViewSettings &view_settings,
+                                    const ColorManagedDisplaySettings &display_settings);
+ImBuf *make_sep_waveform_view_from_ibuf(const ImBuf *ibuf,
+                                        const ColorManagedViewSettings &view_settings,
+                                        const ColorManagedDisplaySettings &display_settings);
+ImBuf *make_vectorscope_view_from_ibuf(const ImBuf *ibuf,
+                                       const ColorManagedViewSettings &view_settings,
+                                       const ColorManagedDisplaySettings &display_settings);
 ImBuf *make_zebra_view_from_ibuf(const ImBuf *ibuf, float perc);

 }  // namespace blender::ed::vse