VSE: speed up solid color effect

It's pretty simple, but threading it, and making it write out whole pixel at a time (instead of one byte at a time) still makes it faster. 4K resolution, five Color strips blended over each other, playback on Windows/VS2022, Ryzen 5950X: - Playback 9.2FPS -> 11.5FPS - do_solid_color for one effect, median time 7.7ms -> 3.8ms Additionally, the solid color on byte output was not doing float->byte color rounding & clamping properly, and on float output it was writing 255.0 into alpha instead of 1.0. So fix that too. Pull Request: https://projects.blender.org/blender/blender/pulls/117058
2024-01-12 18:04:38 +01:00
parent 303c19f736
commit 9c7b3659e2
1 changed files with 25 additions and 35 deletions
--- a/source/blender/sequencer/intern/effects.cc
+++ b/source/blender/sequencer/intern/effects.cc
@@ -1925,51 +1925,41 @@ static ImBuf *do_solid_color(const SeqRenderData *context,
                             ImBuf *ibuf2,
                             ImBuf *ibuf3)
 {
+  using namespace blender;
  ImBuf *out = prepare_effect_imbufs(context, ibuf1, ibuf2, ibuf3);

  SolidColorVars *cv = (SolidColorVars *)seq->effectdata;

-  int x = out->x;
-  int y = out->y;
+  threading::parallel_for(IndexRange(out->y), 64, [&](const IndexRange y_range) {
+    if (out->byte_buffer.data) {
+      /* Byte image. */
+      uchar color[4];
+      rgb_float_to_uchar(color, cv->col);
+      color[3] = 255;

-  if (out->byte_buffer.data) {
-    uchar color[4];
-    color[0] = cv->col[0] * 255;
-    color[1] = cv->col[1] * 255;
-    color[2] = cv->col[2] * 255;
-    color[3] = 255;
-
-    uchar *rect = out->byte_buffer.data;
-
-    for (int i = 0; i < y; i++) {
-      for (int j = 0; j < x; j++) {
-        rect[0] = color[0];
-        rect[1] = color[1];
-        rect[2] = color[2];
-        rect[3] = color[3];
-        rect += 4;
+      uchar *dst = out->byte_buffer.data + y_range.first() * out->x * 4;
+      uchar *dst_end = dst + y_range.size() * out->x * 4;
+      while (dst < dst_end) {
+        memcpy(dst, color, sizeof(color));
+        dst += 4;
      }
    }
-  }
-  else if (out->float_buffer.data) {
-    float color[4];
-    color[0] = cv->col[0];
-    color[1] = cv->col[1];
-    color[2] = cv->col[2];
-    color[3] = 255;
+    else {
+      /* Float image. */
+      float color[4];
+      color[0] = cv->col[0];
+      color[1] = cv->col[1];
+      color[2] = cv->col[2];
+      color[3] = 1.0f;

-    float *rect_float = out->float_buffer.data;
-
-    for (int i = 0; i < y; i++) {
-      for (int j = 0; j < x; j++) {
-        rect_float[0] = color[0];
-        rect_float[1] = color[1];
-        rect_float[2] = color[2];
-        rect_float[3] = color[3];
-        rect_float += 4;
+      float *dst = out->float_buffer.data + y_range.first() * out->x * 4;
+      float *dst_end = dst + y_range.size() * out->x * 4;
+      while (dst < dst_end) {
+        memcpy(dst, color, sizeof(color));
+        dst += 4;
      }
    }
-  }
+  });

  out->planes = R_IMF_PLANES_RGB;