VSE: speedup "show overexposed" option

Speedup the "apply zebra stripes" image loop by multi-threading it. For non-float images, avoid an extra image copy that was not doing anything useful. 4K UHD resolution, Windows Ryzen 5950X: - LDR: whole `sequencer_get_scope` 16.4ms -> 5.3ms, just `draw_zebra` part: 7.5ms -> 3.3ms - Float image: whole `sequencer_get_scope` 126.6ms -> 114.1ms, just `draw_zebra` part: 22.4ms -> 7.4ms. Whole scope is still expensive due to color management work being done. Pull Request: https://projects.blender.org/blender/blender/pulls/115622
2023-12-02 08:09:49 +01:00
parent 7aa3d967ba
commit 854840b35f
2 changed files with 55 additions and 46 deletions
--- a/source/blender/editors/space_sequencer/sequencer_preview_draw.cc
+++ b/source/blender/editors/space_sequencer/sequencer_preview_draw.cc
@@ -569,14 +569,17 @@ static ImBuf *sequencer_get_scope(Scene *scene, SpaceSeq *sseq, ImBuf *ibuf, boo
    switch (sseq->mainb) {
      case SEQ_DRAW_IMG_IMBUF:
        if (!scopes->zebra_ibuf) {
-          ImBuf *display_ibuf = IMB_dupImBuf(ibuf);

-          if (display_ibuf->float_buffer.data) {
+          if (ibuf->float_buffer.data) {
+            ImBuf *display_ibuf = IMB_dupImBuf(ibuf);
            IMB_colormanagement_imbuf_make_display_space(
                display_ibuf, &scene->view_settings, &scene->display_settings);
+            scopes->zebra_ibuf = make_zebra_view_from_ibuf(display_ibuf, sseq->zebra);
+            IMB_freeImBuf(display_ibuf);
+          }
+          else {
+            scopes->zebra_ibuf = make_zebra_view_from_ibuf(ibuf, sseq->zebra);
          }
-          scopes->zebra_ibuf = make_zebra_view_from_ibuf(display_ibuf, sseq->zebra);
-          IMB_freeImBuf(display_ibuf);
        }
        scope = scopes->zebra_ibuf;
        break;
--- a/source/blender/editors/space_sequencer/sequencer_scopes.cc
+++ b/source/blender/editors/space_sequencer/sequencer_scopes.cc
@@ -362,63 +362,69 @@ ImBuf *make_sep_waveform_view_from_ibuf(ImBuf *ibuf)

 static void draw_zebra_byte(const ImBuf *src, ImBuf *ibuf, float perc)
 {
+#ifdef DEBUG_TIME
+  SCOPED_TIMER_AVERAGED(__func__);
+#endif
+  using namespace blender;
  uint limit = 255.0f * perc / 100.0f;
-  const uchar *p = src->byte_buffer.data;
-  uchar *o = ibuf->byte_buffer.data;
-  int x;
-  int y;

-  for (y = 0; y < ibuf->y; y++) {
-    for (x = 0; x < ibuf->x; x++) {
-      uchar r = *p++;
-      uchar g = *p++;
-      uchar b = *p++;
-      uchar a = *p++;
+  threading::parallel_for(IndexRange(ibuf->y), 16, [&](IndexRange y_range) {
+    const uchar *p = src->byte_buffer.data + y_range.first() * ibuf->x * 4;
+    uchar *o = ibuf->byte_buffer.data + y_range.first() * ibuf->x * 4;
+    for (const int y : y_range) {
+      for (int x = 0; x < ibuf->x; x++) {
+        uchar r = *p++;
+        uchar g = *p++;
+        uchar b = *p++;
+        uchar a = *p++;

-      if (r >= limit || g >= limit || b >= limit) {
-        if (((x + y) & 0x08) != 0) {
-          r = 255 - r;
-          g = 255 - g;
-          b = 255 - b;
+        if (r >= limit || g >= limit || b >= limit) {
+          if (((x + y) & 0x08) != 0) {
+            r = 255 - r;
+            g = 255 - g;
+            b = 255 - b;
+          }
        }
+        *o++ = r;
+        *o++ = g;
+        *o++ = b;
+        *o++ = a;
      }
-      *o++ = r;
-      *o++ = g;
-      *o++ = b;
-      *o++ = a;
    }
-  }
+  });
 }

 static void draw_zebra_float(ImBuf *src, ImBuf *ibuf, float perc)
 {
+#ifdef DEBUG_TIME
+  SCOPED_TIMER_AVERAGED(__func__);
+#endif
+  using namespace blender;
+
  float limit = perc / 100.0f;
-  const float *p = src->float_buffer.data;
-  uchar *o = ibuf->byte_buffer.data;
-  int x;
-  int y;

-  for (y = 0; y < ibuf->y; y++) {
-    for (x = 0; x < ibuf->x; x++) {
-      float r = *p++;
-      float g = *p++;
-      float b = *p++;
-      float a = *p++;
-
-      if (r >= limit || g >= limit || b >= limit) {
-        if (((x + y) & 0x08) != 0) {
-          r = -r;
-          g = -g;
-          b = -b;
+  threading::parallel_for(IndexRange(ibuf->y), 16, [&](IndexRange y_range) {
+    const float *p = src->float_buffer.data + y_range.first() * ibuf->x * 4;
+    uchar *o = ibuf->byte_buffer.data + y_range.first() * ibuf->x * 4;
+    for (const int y : y_range) {
+      for (int x = 0; x < ibuf->x; x++) {
+        float pix[4];
+        pix[0] = *p++;
+        pix[1] = *p++;
+        pix[2] = *p++;
+        pix[3] = *p++;
+        if (pix[0] >= limit || pix[1] >= limit || pix[2] >= limit) {
+          if (((x + y) & 0x08) != 0) {
+            pix[0] = -pix[0];
+            pix[1] = -pix[1];
+            pix[2] = -pix[2];
+          }
        }
+        rgba_float_to_uchar(o, pix);
+        o += 4;
      }
-
-      *o++ = unit_float_to_uchar_clamp(r);
-      *o++ = unit_float_to_uchar_clamp(g);
-      *o++ = unit_float_to_uchar_clamp(b);
-      *o++ = unit_float_to_uchar_clamp(a);
    }
-  }
+  });
 }

 ImBuf *make_zebra_view_from_ibuf(ImBuf *ibuf, float perc)