VSE: speedup "show overexposed" option

Speedup the "apply zebra stripes" image loop by multi-threading it.
For non-float images, avoid an extra image copy that was not doing
anything useful.

4K UHD resolution, Windows Ryzen 5950X:

- LDR: whole `sequencer_get_scope` 16.4ms -> 5.3ms, just `draw_zebra`
  part: 7.5ms -> 3.3ms
- Float image: whole `sequencer_get_scope` 126.6ms -> 114.1ms, just
  `draw_zebra` part: 22.4ms -> 7.4ms. Whole scope is still expensive
  due to color management work being done.

Pull Request: https://projects.blender.org/blender/blender/pulls/115622
This commit is contained in:
Aras Pranckevicius
2023-12-02 08:09:49 +01:00
committed by Aras Pranckevicius
parent 7aa3d967ba
commit 854840b35f
2 changed files with 55 additions and 46 deletions

View File

@@ -569,14 +569,17 @@ static ImBuf *sequencer_get_scope(Scene *scene, SpaceSeq *sseq, ImBuf *ibuf, boo
switch (sseq->mainb) {
case SEQ_DRAW_IMG_IMBUF:
if (!scopes->zebra_ibuf) {
ImBuf *display_ibuf = IMB_dupImBuf(ibuf);
if (display_ibuf->float_buffer.data) {
if (ibuf->float_buffer.data) {
ImBuf *display_ibuf = IMB_dupImBuf(ibuf);
IMB_colormanagement_imbuf_make_display_space(
display_ibuf, &scene->view_settings, &scene->display_settings);
scopes->zebra_ibuf = make_zebra_view_from_ibuf(display_ibuf, sseq->zebra);
IMB_freeImBuf(display_ibuf);
}
else {
scopes->zebra_ibuf = make_zebra_view_from_ibuf(ibuf, sseq->zebra);
}
scopes->zebra_ibuf = make_zebra_view_from_ibuf(display_ibuf, sseq->zebra);
IMB_freeImBuf(display_ibuf);
}
scope = scopes->zebra_ibuf;
break;

View File

@@ -362,63 +362,69 @@ ImBuf *make_sep_waveform_view_from_ibuf(ImBuf *ibuf)
static void draw_zebra_byte(const ImBuf *src, ImBuf *ibuf, float perc)
{
#ifdef DEBUG_TIME
SCOPED_TIMER_AVERAGED(__func__);
#endif
using namespace blender;
uint limit = 255.0f * perc / 100.0f;
const uchar *p = src->byte_buffer.data;
uchar *o = ibuf->byte_buffer.data;
int x;
int y;
for (y = 0; y < ibuf->y; y++) {
for (x = 0; x < ibuf->x; x++) {
uchar r = *p++;
uchar g = *p++;
uchar b = *p++;
uchar a = *p++;
threading::parallel_for(IndexRange(ibuf->y), 16, [&](IndexRange y_range) {
const uchar *p = src->byte_buffer.data + y_range.first() * ibuf->x * 4;
uchar *o = ibuf->byte_buffer.data + y_range.first() * ibuf->x * 4;
for (const int y : y_range) {
for (int x = 0; x < ibuf->x; x++) {
uchar r = *p++;
uchar g = *p++;
uchar b = *p++;
uchar a = *p++;
if (r >= limit || g >= limit || b >= limit) {
if (((x + y) & 0x08) != 0) {
r = 255 - r;
g = 255 - g;
b = 255 - b;
if (r >= limit || g >= limit || b >= limit) {
if (((x + y) & 0x08) != 0) {
r = 255 - r;
g = 255 - g;
b = 255 - b;
}
}
*o++ = r;
*o++ = g;
*o++ = b;
*o++ = a;
}
*o++ = r;
*o++ = g;
*o++ = b;
*o++ = a;
}
}
});
}
static void draw_zebra_float(ImBuf *src, ImBuf *ibuf, float perc)
{
#ifdef DEBUG_TIME
SCOPED_TIMER_AVERAGED(__func__);
#endif
using namespace blender;
float limit = perc / 100.0f;
const float *p = src->float_buffer.data;
uchar *o = ibuf->byte_buffer.data;
int x;
int y;
for (y = 0; y < ibuf->y; y++) {
for (x = 0; x < ibuf->x; x++) {
float r = *p++;
float g = *p++;
float b = *p++;
float a = *p++;
if (r >= limit || g >= limit || b >= limit) {
if (((x + y) & 0x08) != 0) {
r = -r;
g = -g;
b = -b;
threading::parallel_for(IndexRange(ibuf->y), 16, [&](IndexRange y_range) {
const float *p = src->float_buffer.data + y_range.first() * ibuf->x * 4;
uchar *o = ibuf->byte_buffer.data + y_range.first() * ibuf->x * 4;
for (const int y : y_range) {
for (int x = 0; x < ibuf->x; x++) {
float pix[4];
pix[0] = *p++;
pix[1] = *p++;
pix[2] = *p++;
pix[3] = *p++;
if (pix[0] >= limit || pix[1] >= limit || pix[2] >= limit) {
if (((x + y) & 0x08) != 0) {
pix[0] = -pix[0];
pix[1] = -pix[1];
pix[2] = -pix[2];
}
}
rgba_float_to_uchar(o, pix);
o += 4;
}
*o++ = unit_float_to_uchar_clamp(r);
*o++ = unit_float_to_uchar_clamp(g);
*o++ = unit_float_to_uchar_clamp(b);
*o++ = unit_float_to_uchar_clamp(a);
}
}
});
}
ImBuf *make_zebra_view_from_ibuf(ImBuf *ibuf, float perc)