From 8222cb0cfd2af22c44faa459cdc82dc9d1ccd4d3 Mon Sep 17 00:00:00 2001
From: Aras Pranckevicius <aras@nesnausk.org>
Date: Wed, 6 Aug 2025 16:24:05 +0200
Subject: [PATCH] VSE: Faster waveform & vectorscope calculation

Waveform, Parade and Vectorscopes were calculated by copying the
rendered image, transforming it into display space, and calculating
the the scope from that. On large resolutions, this
copy+transform+free of the image was taking up majority of the time.
Especially for default case when the display transform is a no-op.
Change the code so that display transform, if needed, is done directly
inside scope calculation, without needing a full-size temporary image.

Additionally, the vectorscope calculation was single threaded.
Multi-thread it by doing a parallel reduction, where each job
calculates their own scope image, and they are merged. Since job
payload is fairly large (512x512 bytes), make jobs pretty large
(256k pixels each).

Time (in ms) taken to calculate scope at 4K resolution (Ryzen 5950X,
Windows). Default color management settings:

- Waveform, PNG/SDR: 5.5 -> 5.2
- Waveform, EXR/HDR: 33.5 -> 10.3
- Vectorscope, PNG/SDR: 32.4 -> 4.5
- Vectorscope, EXR/HDR: 53.2 -> 9.8

Timings when additional color space management is needed (display
space set to Display P3, sequencer kept at sRGB):

- Waveform, PNG/SDR: 29.5 -> 10.9
- Waveform, EXR/HDR: 67.6 -> 10.9
- Vectorscope, PNG/SDR: 56.8 -> 12.0
- Vectorscope, EXR/HDR: 85.9 -> 13.4

This also fixes calculation of waveform / vectorscope on float (HDR)
images that have alpha channel; the scope was wrongly calculated on
premultiplied color values, which was not consistent with how it was
calculated on the byte images.

Pull Request: https://projects.blender.org/blender/blender/pulls/144059
---
 source/blender/blenlib/BLI_math_color.h       |   8 +-
 source/blender/blenlib/intern/math_color.cc   |  25 --
 .../blenlib/intern/math_color_inline.cc       |  25 ++
 .../space_sequencer/sequencer_preview_draw.cc |  29 +-
 .../space_sequencer/sequencer_scopes.cc       | 338 +++++++++++++-----
 .../space_sequencer/sequencer_scopes.hh       |  12 +-
 6 files changed, 295 insertions(+), 142 deletions(-)

diff --git a/source/blender/blenlib/BLI_math_color.h b/source/blender/blenlib/BLI_math_color.h
index 11ab7aa6fb4..a878d93b2d5 100644
--- a/source/blender/blenlib/BLI_math_color.h
+++ b/source/blender/blenlib/BLI_math_color.h
@@ -137,10 +137,10 @@ void rgb_float_set_hue_float_offset(float rgb[3], float hue_offset);
  */
 void rgb_byte_set_hue_float_offset(unsigned char rgb[3], float hue_offset);
 
-void rgb_uchar_to_float(float r_col[3], const unsigned char col_ub[3]);
-void rgba_uchar_to_float(float r_col[4], const unsigned char col_ub[4]);
-void rgb_float_to_uchar(unsigned char r_col[3], const float col_f[3]);
-void rgba_float_to_uchar(unsigned char r_col[4], const float col_f[4]);
+MINLINE void rgb_uchar_to_float(float r_col[3], const unsigned char col_ub[3]);
+MINLINE void rgba_uchar_to_float(float r_col[4], const unsigned char col_ub[4]);
+MINLINE void rgb_float_to_uchar(unsigned char r_col[3], const float col_f[3]);
+MINLINE void rgba_float_to_uchar(unsigned char r_col[4], const float col_f[4]);
 
 /**
  * Compute luminance using Rec.709 primaries, for sRGB and linear Rec.709.
diff --git a/source/blender/blenlib/intern/math_color.cc b/source/blender/blenlib/intern/math_color.cc
index ac7a1a6b87f..55e31d933da 100644
--- a/source/blender/blenlib/intern/math_color.cc
+++ b/source/blender/blenlib/intern/math_color.cc
@@ -393,31 +393,6 @@ void cpack_to_rgb(uint col, float *r_r, float *r_g, float *r_b)
   *r_b = float((col >> 16) & 0xFF) * (1.0f / 255.0f);
 }
 
-void rgb_uchar_to_float(float r_col[3], const uchar col_ub[3])
-{
-  r_col[0] = float(col_ub[0]) * (1.0f / 255.0f);
-  r_col[1] = float(col_ub[1]) * (1.0f / 255.0f);
-  r_col[2] = float(col_ub[2]) * (1.0f / 255.0f);
-}
-
-void rgba_uchar_to_float(float r_col[4], const uchar col_ub[4])
-{
-  r_col[0] = float(col_ub[0]) * (1.0f / 255.0f);
-  r_col[1] = float(col_ub[1]) * (1.0f / 255.0f);
-  r_col[2] = float(col_ub[2]) * (1.0f / 255.0f);
-  r_col[3] = float(col_ub[3]) * (1.0f / 255.0f);
-}
-
-void rgb_float_to_uchar(uchar r_col[3], const float col_f[3])
-{
-  unit_float_to_uchar_clamp_v3(r_col, col_f);
-}
-
-void rgba_float_to_uchar(uchar r_col[4], const float col_f[4])
-{
-  unit_float_to_uchar_clamp_v4(r_col, col_f);
-}
-
 /* ********************************* color transforms ********************************* */
 
 float srgb_to_linearrgb(float c)
diff --git a/source/blender/blenlib/intern/math_color_inline.cc b/source/blender/blenlib/intern/math_color_inline.cc
index f63dd9889e6..46536cfb491 100644
--- a/source/blender/blenlib/intern/math_color_inline.cc
+++ b/source/blender/blenlib/intern/math_color_inline.cc
@@ -145,6 +145,31 @@ MINLINE void srgb_to_linearrgb_uchar4_predivide(float linear[4], const unsigned
   srgb_to_linearrgb_predivide_v4(linear, fsrgb);
 }
 
+MINLINE void rgb_uchar_to_float(float r_col[3], const uchar col_ub[3])
+{
+  r_col[0] = float(col_ub[0]) * (1.0f / 255.0f);
+  r_col[1] = float(col_ub[1]) * (1.0f / 255.0f);
+  r_col[2] = float(col_ub[2]) * (1.0f / 255.0f);
+}
+
+MINLINE void rgba_uchar_to_float(float r_col[4], const uchar col_ub[4])
+{
+  r_col[0] = float(col_ub[0]) * (1.0f / 255.0f);
+  r_col[1] = float(col_ub[1]) * (1.0f / 255.0f);
+  r_col[2] = float(col_ub[2]) * (1.0f / 255.0f);
+  r_col[3] = float(col_ub[3]) * (1.0f / 255.0f);
+}
+
+MINLINE void rgb_float_to_uchar(uchar r_col[3], const float col_f[3])
+{
+  unit_float_to_uchar_clamp_v3(r_col, col_f);
+}
+
+MINLINE void rgba_float_to_uchar(uchar r_col[4], const float col_f[4])
+{
+  unit_float_to_uchar_clamp_v4(r_col, col_f);
+}
+
 MINLINE void rgba_uchar_args_set(
     uchar col[4], const uchar r, const uchar g, const uchar b, const uchar a)
 {
diff --git a/source/blender/editors/space_sequencer/sequencer_preview_draw.cc b/source/blender/editors/space_sequencer/sequencer_preview_draw.cc
index d5202a8e1df..5a1aa6287e8 100644
--- a/source/blender/editors/space_sequencer/sequencer_preview_draw.cc
+++ b/source/blender/editors/space_sequencer/sequencer_preview_draw.cc
@@ -171,23 +171,6 @@ ImBuf *sequencer_ibuf_get(const bContext *C, const int timeline_frame, const cha
   return ibuf;
 }
 
-static ImBuf *sequencer_make_scope(const ColorManagedViewSettings &view_settings,
-                                   const ColorManagedDisplaySettings &display_settings,
-                                   const ImBuf &ibuf,
-                                   ImBuf *(*make_scope_fn)(const ImBuf *ibuf))
-{
-  ImBuf *display_ibuf = IMB_dupImBuf(&ibuf);
-  ImBuf *scope;
-
-  IMB_colormanagement_imbuf_make_display_space(display_ibuf, &view_settings, &display_settings);
-
-  scope = make_scope_fn(display_ibuf);
-
-  IMB_freeImBuf(display_ibuf);
-
-  return scope;
-}
-
 static void sequencer_display_size(const RenderData &render_data, float r_viewrect[2])
 {
   r_viewrect[0] = float(render_data.xsch);
@@ -821,14 +804,14 @@ static bool sequencer_calc_scopes(const SpaceSeq &space_sequencer,
       break;
     case SEQ_DRAW_IMG_WAVEFORM:
       if (!scopes->waveform_ibuf) {
-        scopes->waveform_ibuf = sequencer_make_scope(
-            view_settings, display_settings, ibuf, make_waveform_view_from_ibuf);
+        scopes->waveform_ibuf = make_waveform_view_from_ibuf(
+            &ibuf, view_settings, display_settings);
       }
       break;
     case SEQ_DRAW_IMG_VECTORSCOPE:
       if (!scopes->vector_ibuf) {
-        scopes->vector_ibuf = sequencer_make_scope(
-            view_settings, display_settings, ibuf, make_vectorscope_view_from_ibuf);
+        scopes->vector_ibuf = make_vectorscope_view_from_ibuf(
+            &ibuf, view_settings, display_settings);
       }
       break;
     case SEQ_DRAW_IMG_HISTOGRAM: {
@@ -836,8 +819,8 @@ static bool sequencer_calc_scopes(const SpaceSeq &space_sequencer,
     } break;
     case SEQ_DRAW_IMG_RGBPARADE:
       if (!scopes->sep_waveform_ibuf) {
-        scopes->sep_waveform_ibuf = sequencer_make_scope(
-            view_settings, display_settings, ibuf, make_sep_waveform_view_from_ibuf);
+        scopes->sep_waveform_ibuf = make_sep_waveform_view_from_ibuf(
+            &ibuf, view_settings, display_settings);
       }
       break;
     default: /* Future files might have scopes we don't know about. */
diff --git a/source/blender/editors/space_sequencer/sequencer_scopes.cc b/source/blender/editors/space_sequencer/sequencer_scopes.cc
index 85833d54774..344cf6ac7c5 100644
--- a/source/blender/editors/space_sequencer/sequencer_scopes.cc
+++ b/source/blender/editors/space_sequencer/sequencer_scopes.cc
@@ -105,7 +105,48 @@ static void init_wave_table(int height, uchar wtable[256])
   }
 }
 
-ImBuf *make_waveform_view_from_ibuf(const ImBuf *ibuf)
+static void rgba_float_to_display_space(ColormanageProcessor *processor,
+                                        const ColorSpace *src_colorspace,
+                                        MutableSpan<float4> pixels)
+{
+  IMB_colormanagement_colorspace_to_scene_linear(
+      &pixels.data()->x, pixels.size(), 1, 4, src_colorspace, false);
+  IMB_colormanagement_processor_apply(processor, &pixels.data()->x, pixels.size(), 1, 4, false);
+}
+
+static Array<float4> pixels_to_display_space(ColormanageProcessor *processor,
+                                             const ColorSpace *src_colorspace,
+                                             int64_t num,
+                                             const float *src,
+                                             int64_t stride)
+{
+  Array<float4> result(num, NoInitialization());
+  for (int64_t i : result.index_range()) {
+    premul_to_straight_v4_v4(result[i], src);
+    src += stride;
+  }
+  rgba_float_to_display_space(processor, src_colorspace, result);
+  return result;
+}
+
+static Array<float4> pixels_to_display_space(ColormanageProcessor *processor,
+                                             const ColorSpace *src_colorspace,
+                                             int64_t num,
+                                             const uchar *src,
+                                             int64_t stride)
+{
+  Array<float4> result(num, NoInitialization());
+  for (int64_t i : result.index_range()) {
+    rgba_uchar_to_float(result[i], src);
+    src += stride;
+  }
+  rgba_float_to_display_space(processor, src_colorspace, result);
+  return result;
+}
+
+ImBuf *make_waveform_view_from_ibuf(const ImBuf *ibuf,
+                                    const ColorManagedViewSettings &view_settings,
+                                    const ColorManagedDisplaySettings &display_settings)
 {
 #ifdef DEBUG_TIME
   SCOPED_TIMER(__func__);
@@ -118,6 +159,9 @@ ImBuf *make_waveform_view_from_ibuf(const ImBuf *ibuf)
   uchar wtable[256];
   init_wave_table(ibuf->y, wtable);
 
+  ColormanageProcessor *cm_processor = IMB_colormanagement_display_processor_for_imbuf(
+      ibuf, &view_settings, &display_settings);
+
   /* IMB_colormanagement_get_luminance_byte for each pixel is quite a lot of
    * overhead, so instead get luma coefficients as 16-bit integers. */
   float coeffs[3];
@@ -125,46 +169,75 @@ ImBuf *make_waveform_view_from_ibuf(const ImBuf *ibuf)
   const int muls[3] = {int(coeffs[0] * 65535), int(coeffs[1] * 65535), int(coeffs[2] * 65535)};
 
   /* Parallel over x, since each column is easily independent from others. */
-  threading::parallel_for(IndexRange(ibuf->x), 32, [&](IndexRange x_range) {
+  threading::parallel_for_each(IndexRange(ibuf->x), [&](const int x) {
     if (ibuf->float_buffer.data) {
-      /* Float image. */
-      const float *src = ibuf->float_buffer.data;
-      for (int y = 0; y < ibuf->y; y++) {
-        for (const int x : x_range) {
-          const float *rgb = src + 4 * (ibuf->x * y + x);
-          float v = IMB_colormanagement_get_luminance(rgb);
+      const float *src = ibuf->float_buffer.data + x * 4;
+      if (!cm_processor) {
+        /* Float image, no color space conversions needed. */
+        for (int y = 0; y < ibuf->y; y++) {
+          float4 pixel;
+          premul_to_straight_v4_v4(pixel, src);
+          float v = dot_v3v3(pixel, coeffs);
+          uchar *p = tgt;
+          int iv = clamp_i(int(v * h), 0, h - 1);
+          p += 4 * (w * iv + x);
+          scope_put_pixel(wtable, p);
+          src += ibuf->x * 4;
+        }
+      }
+      else {
+        /* Float image, with color space conversions. */
+        Array<float4> pixels = pixels_to_display_space(
+            cm_processor, ibuf->float_buffer.colorspace, ibuf->y, src, ibuf->x * 4);
+        for (int y = 0; y < ibuf->y; y++) {
+          float v = dot_v3v3(pixels[y], coeffs);
           uchar *p = tgt;
-
           int iv = clamp_i(int(v * h), 0, h - 1);
-
           p += 4 * (w * iv + x);
           scope_put_pixel(wtable, p);
         }
       }
     }
     else {
-      /* Byte image. */
-      const uchar *src = ibuf->byte_buffer.data;
-      for (int y = 0; y < ibuf->y; y++) {
-        for (const int x : x_range) {
-          const uchar *rgb = src + 4 * (ibuf->x * y + x);
+      const uchar *src = ibuf->byte_buffer.data + x * 4;
+      if (!cm_processor) {
+        /* Byte image, no color space conversions needed. */
+        for (int y = 0; y < ibuf->y; y++) {
           /* +1 is "Sree's solution" from http://stereopsis.com/doubleblend.html */
-          int rgb0 = rgb[0] + 1;
-          int rgb1 = rgb[1] + 1;
-          int rgb2 = rgb[2] + 1;
+          int rgb0 = src[0] + 1;
+          int rgb1 = src[1] + 1;
+          int rgb2 = src[2] + 1;
           int luma = (rgb0 * muls[0] + rgb1 * muls[1] + rgb2 * muls[2]) >> 16;
           int luma_y = clamp_i(luma, 0, 255);
           uchar *p = tgt + 4 * (w * luma_y + x);
           scope_put_pixel(wtable, p);
+          src += ibuf->x * 4;
+        }
+      }
+      else {
+        /* Byte image, with color space conversions. */
+        Array<float4> pixels = pixels_to_display_space(
+            cm_processor, ibuf->byte_buffer.colorspace, ibuf->y, src, ibuf->x * 4);
+        for (int y = 0; y < ibuf->y; y++) {
+          float v = dot_v3v3(pixels[y], coeffs);
+          uchar *p = tgt;
+          int iv = clamp_i(int(v * h), 0, h - 1);
+          p += 4 * (w * iv + x);
+          scope_put_pixel(wtable, p);
         }
       }
     }
   });
 
+  if (cm_processor) {
+    IMB_colormanagement_processor_free(cm_processor);
+  }
   return rval;
 }
 
-ImBuf *make_sep_waveform_view_from_ibuf(const ImBuf *ibuf)
+ImBuf *make_sep_waveform_view_from_ibuf(const ImBuf *ibuf,
+                                        const ColorManagedViewSettings &view_settings,
+                                        const ColorManagedDisplaySettings &display_settings)
 {
 #ifdef DEBUG_TIME
   SCOPED_TIMER(__func__);
@@ -178,19 +251,38 @@ ImBuf *make_sep_waveform_view_from_ibuf(const ImBuf *ibuf)
   uchar wtable[256];
   init_wave_table(ibuf->y, wtable);
 
+  ColormanageProcessor *cm_processor = IMB_colormanagement_display_processor_for_imbuf(
+      ibuf, &view_settings, &display_settings);
+
   /* Parallel over x, since each column is easily independent from others. */
-  threading::parallel_for(IndexRange(ibuf->x), 32, [&](IndexRange x_range) {
+  threading::parallel_for_each(IndexRange(ibuf->x), [&](const int x) {
     if (ibuf->float_buffer.data) {
-      /* Float image. */
-      const float *src = ibuf->float_buffer.data;
-      for (int y = 0; y < ibuf->y; y++) {
-        for (const int x : x_range) {
-          const float *rgb = src + 4 * (ibuf->x * y + x);
+      const float *src = ibuf->float_buffer.data + x * 4;
+      if (!cm_processor) {
+        /* Float image, no color space conversions needed. */
+        for (int y = 0; y < ibuf->y; y++) {
+          float4 pixel;
+          premul_to_straight_v4_v4(pixel, src);
           for (int c = 0; c < 3; c++) {
             uchar *p = tgt;
-            float v = rgb[c];
+            float v = pixel[c];
+            int iv = clamp_i(int(v * h), 0, h - 1);
+            p += 4 * (w * iv + c * sw + x / 3);
+            scope_put_pixel_single(wtable, p, c);
+          }
+          src += ibuf->x * 4;
+        }
+      }
+      else {
+        /* Float image, with color space conversions. */
+        Array<float4> pixels = pixels_to_display_space(
+            cm_processor, ibuf->float_buffer.colorspace, ibuf->y, src, ibuf->x * 4);
+        for (int y = 0; y < ibuf->y; y++) {
+          float4 pixel = pixels[y];
+          for (int c = 0; c < 3; c++) {
+            uchar *p = tgt;
+            float v = pixel[c];
             int iv = clamp_i(int(v * h), 0, h - 1);
-
             p += 4 * (w * iv + c * sw + x / 3);
             scope_put_pixel_single(wtable, p, c);
           }
@@ -198,14 +290,29 @@ ImBuf *make_sep_waveform_view_from_ibuf(const ImBuf *ibuf)
       }
     }
     else {
-      /* Byte image. */
-      const uchar *src = ibuf->byte_buffer.data;
-      for (int y = 0; y < ibuf->y; y++) {
-        for (const int x : x_range) {
-          const uchar *rgb = src + 4 * (ibuf->x * y + x);
+      const uchar *src = ibuf->byte_buffer.data + x * 4;
+      if (!cm_processor) {
+        /* Byte image, no color space conversions needed. */
+        for (int y = 0; y < ibuf->y; y++) {
           for (int c = 0; c < 3; c++) {
             uchar *p = tgt;
-            p += 4 * (w * rgb[c] + c * sw + x / 3);
+            p += 4 * (w * src[c] + c * sw + x / 3);
+            scope_put_pixel_single(wtable, p, c);
+          }
+          src += ibuf->x * 4;
+        }
+      }
+      else {
+        /* Byte image, with color space conversions. */
+        Array<float4> pixels = pixels_to_display_space(
+            cm_processor, ibuf->byte_buffer.colorspace, ibuf->y, src, ibuf->x * 4);
+        for (int y = 0; y < ibuf->y; y++) {
+          float4 pixel = pixels[y];
+          for (int c = 0; c < 3; c++) {
+            uchar *p = tgt;
+            float v = pixel[c];
+            int iv = clamp_i(int(v * h), 0, h - 1);
+            p += 4 * (w * iv + c * sw + x / 3);
             scope_put_pixel_single(wtable, p, c);
           }
         }
@@ -213,6 +320,9 @@ ImBuf *make_sep_waveform_view_from_ibuf(const ImBuf *ibuf)
     }
   });
 
+  if (cm_processor) {
+    IMB_colormanagement_processor_free(cm_processor);
+  }
   return rval;
 }
 
@@ -318,14 +428,8 @@ void ScopeHistogram::calc_from_ibuf(const ImBuf *ibuf,
           }
           else {
             /* Float image, with color space conversions. */
-            Array<float4> pixels(range.size(), NoInitialization());
-            for (int64_t i : pixels.index_range()) {
-              premul_to_straight_v4_v4(pixels[i], src + i * 4);
-            }
-            IMB_colormanagement_colorspace_to_scene_linear(
-                &pixels.data()->x, pixels.size(), 1, 4, ibuf->float_buffer.colorspace, false);
-            IMB_colormanagement_processor_apply(
-                cm_processor, &pixels.data()->x, pixels.size(), 1, 4, false);
+            Array<float4> pixels = pixels_to_display_space(
+                cm_processor, ibuf->float_buffer.colorspace, range.size(), src, 4);
             for (const float4 &pixel : pixels) {
               res[get_bin_float(pixel.x)].x++;
               res[get_bin_float(pixel.y)].y++;
@@ -347,14 +451,8 @@ void ScopeHistogram::calc_from_ibuf(const ImBuf *ibuf,
           }
           else {
             /* Byte image, with color space conversions. */
-            Array<float4> pixels(range.size(), NoInitialization());
-            for (int64_t i : pixels.index_range()) {
-              rgba_uchar_to_float(pixels[i], src + i * 4);
-            }
-            IMB_colormanagement_colorspace_to_scene_linear(
-                &pixels.data()->x, pixels.size(), 1, 4, ibuf->byte_buffer.colorspace, false);
-            IMB_colormanagement_processor_apply(
-                cm_processor, &pixels.data()->x, pixels.size(), 1, 4, false);
+            Array<float4> pixels = pixels_to_display_space(
+                cm_processor, ibuf->byte_buffer.colorspace, range.size(), src, 4);
             for (const float4 &pixel : pixels) {
               uchar pixel_b[4];
               rgba_float_to_uchar(pixel_b, pixel);
@@ -366,6 +464,7 @@ void ScopeHistogram::calc_from_ibuf(const ImBuf *ibuf,
         }
         return res;
       },
+      /* Merge histograms computed per-thread. */
       [&](const Array<uint3> &a, const Array<uint3> &b) {
         BLI_assert(a.size() == b.size());
         Array<uint3> res(a.size());
@@ -385,57 +484,122 @@ void ScopeHistogram::calc_from_ibuf(const ImBuf *ibuf,
   }
 }
 
-ImBuf *make_vectorscope_view_from_ibuf(const ImBuf *ibuf)
+ImBuf *make_vectorscope_view_from_ibuf(const ImBuf *ibuf,
+                                       const ColorManagedViewSettings &view_settings,
+                                       const ColorManagedDisplaySettings &display_settings)
 {
 #ifdef DEBUG_TIME
   SCOPED_TIMER(__func__);
 #endif
-  const int size = 512;
+  constexpr int size = 512;
   const float size_mul = size - 1.0f;
-  ImBuf *rval = IMB_allocImBuf(size, size, 32, IB_byte_data);
 
-  uchar *dst = rval->byte_buffer.data;
-  float rgb[3];
+  ColormanageProcessor *cm_processor = IMB_colormanagement_display_processor_for_imbuf(
+      ibuf, &view_settings, &display_settings);
 
+  const bool is_float = ibuf->float_buffer.data != nullptr;
+  /* Vector scope is calculated by scattering writes into the resulting scope image. Do it with
+   * parallel reduce, by filling a separate image per job and merging them. Since the payload
+   * of each job is fairly large, make the jobs large enough too. */
+  constexpr int64_t grain_size = 256 * 1024;
+  Array<uint8_t> counts(size * size, uint8_t(0));
+  Array<uint8_t> data = threading::parallel_reduce(
+      IndexRange(IMB_get_pixel_count(ibuf)),
+      grain_size,
+      counts,
+      [&](const IndexRange range, const Array<uint8_t> &init) {
+        Array<uint8_t> res = init;
+
+        const float *src_f = is_float ? ibuf->float_buffer.data + range.first() * 4 : nullptr;
+        const uchar *src_b = !is_float ? ibuf->byte_buffer.data + range.first() * 4 : nullptr;
+        if (cm_processor) {
+          /* Byte or float image, color space conversions needed. Do them in smaller chunks
+           * than the whole job size, so they fit into CPU cache and can be on the stack. */
+          constexpr int64_t chunk_size = 4 * 1024;
+          float4 pixels[chunk_size];
+          for (int64_t index = 0; index < range.size(); index += chunk_size) {
+            const int64_t sub_size = std::min(range.size() - index, chunk_size);
+            if (is_float) {
+              for (int64_t i = 0; i < sub_size; i++) {
+                premul_to_straight_v4_v4(pixels[i], src_f);
+                src_f += 4;
+              }
+            }
+            else {
+              for (int64_t i = 0; i < sub_size; i++) {
+                rgba_uchar_to_float(pixels[i], src_b);
+                src_b += 4;
+              }
+            }
+            MutableSpan<float4> pixels_span = MutableSpan<float4>(pixels, sub_size);
+            rgba_float_to_display_space(cm_processor,
+                                        is_float ? ibuf->float_buffer.colorspace :
+                                                   ibuf->byte_buffer.colorspace,
+                                        pixels_span);
+            for (float4 pixel : pixels_span) {
+              clamp_v3(pixel, 0.0f, 1.0f);
+              float2 uv = rgb_to_uv_normalized(pixel) * size_mul;
+              int offset = size * int(uv.y) + int(uv.x);
+              res[offset] = std::min<int>(res[offset] + 1, 255);
+            }
+          }
+        }
+        else if (is_float) {
+          /* Float image, no color space conversions needed. */
+          for ([[maybe_unused]] const int64_t index : range) {
+            float4 pixel;
+            premul_to_straight_v4_v4(pixel, src_f);
+            clamp_v3(pixel, 0.0f, 1.0f);
+            float2 uv = rgb_to_uv_normalized(pixel) * size_mul;
+            int offset = size * int(uv.y) + int(uv.x);
+            res[offset] = std::min<int>(res[offset] + 1, 255);
+            src_f += 4;
+          }
+        }
+        else {
+          /* Byte image, no color space conversions needed. */
+          for ([[maybe_unused]] const int64_t index : range) {
+            float4 pixel;
+            rgb_uchar_to_float(pixel, src_b);
+            float2 uv = rgb_to_uv_normalized(pixel) * size_mul;
+            int offset = size * int(uv.y) + int(uv.x);
+            res[offset] = std::min<int>(res[offset] + 1, 255);
+            src_b += 4;
+          }
+        }
+        return res;
+      },
+      /* Merge scopes computed per-thread. */
+      [&](const Array<uint8_t> &a, const Array<uint8_t> &b) {
+        BLI_assert(a.size() == b.size());
+        Array<uint8_t> res(a.size(), NoInitialization());
+        for (int64_t i : a.index_range()) {
+          res[i] = std::min<int>(a[i] + b[i], 255);
+        }
+        return res;
+      });
+
+  /* Fill the vector scope image from the computed data. */
   uchar wtable[256];
   init_wave_table(math::midpoint(ibuf->x, ibuf->y), wtable);
 
-  if (ibuf->float_buffer.data) {
-    /* Float image. */
-    const float *src = ibuf->float_buffer.data;
-    for (int y = 0; y < ibuf->y; y++) {
-      for (int x = 0; x < ibuf->x; x++) {
-        memcpy(rgb, src, sizeof(float[3]));
-        clamp_v3(rgb, 0.0f, 1.0f);
-
-        float2 uv = rgb_to_uv_normalized(rgb) * size_mul;
-
-        uchar *p = dst + 4 * (size * int(uv.y) + int(uv.x));
-        scope_put_pixel(wtable, p);
-
-        src += 4;
-      }
-    }
-  }
-  else {
-    /* Byte image. */
-    const uchar *src = ibuf->byte_buffer.data;
-    for (int y = 0; y < ibuf->y; y++) {
-      for (int x = 0; x < ibuf->x; x++) {
-        rgb[0] = float(src[0]) * (1.0f / 255.0f);
-        rgb[1] = float(src[1]) * (1.0f / 255.0f);
-        rgb[2] = float(src[2]) * (1.0f / 255.0f);
-
-        float2 uv = rgb_to_uv_normalized(rgb) * size_mul;
-
-        uchar *p = dst + 4 * (size * int(uv.y) + int(uv.x));
-        scope_put_pixel(wtable, p);
-
-        src += 4;
-      }
+  ImBuf *rval = IMB_allocImBuf(size, size, 32, IB_byte_data | IB_uninitialized_pixels);
+  uchar *dst = rval->byte_buffer.data;
+  for (int i = 0; i < size * size; i++) {
+    uint8_t val = data[i];
+    if (val != 0) {
+      val = wtable[val];
     }
+    dst[0] = val;
+    dst[1] = val;
+    dst[2] = val;
+    dst[3] = 255;
+    dst += 4;
   }
 
+  if (cm_processor) {
+    IMB_colormanagement_processor_free(cm_processor);
+  }
   return rval;
 }
 
diff --git a/source/blender/editors/space_sequencer/sequencer_scopes.hh b/source/blender/editors/space_sequencer/sequencer_scopes.hh
index 38a819a4ec6..0c2752d3af8 100644
--- a/source/blender/editors/space_sequencer/sequencer_scopes.hh
+++ b/source/blender/editors/space_sequencer/sequencer_scopes.hh
@@ -55,9 +55,15 @@ struct SeqScopes : public NonCopyable {
   void cleanup();
 };
 
-ImBuf *make_waveform_view_from_ibuf(const ImBuf *ibuf);
-ImBuf *make_sep_waveform_view_from_ibuf(const ImBuf *ibuf);
-ImBuf *make_vectorscope_view_from_ibuf(const ImBuf *ibuf);
+ImBuf *make_waveform_view_from_ibuf(const ImBuf *ibuf,
+                                    const ColorManagedViewSettings &view_settings,
+                                    const ColorManagedDisplaySettings &display_settings);
+ImBuf *make_sep_waveform_view_from_ibuf(const ImBuf *ibuf,
+                                        const ColorManagedViewSettings &view_settings,
+                                        const ColorManagedDisplaySettings &display_settings);
+ImBuf *make_vectorscope_view_from_ibuf(const ImBuf *ibuf,
+                                       const ColorManagedViewSettings &view_settings,
+                                       const ColorManagedDisplaySettings &display_settings);
 ImBuf *make_zebra_view_from_ibuf(const ImBuf *ibuf, float perc);
 
 }  // namespace blender::ed::vse