Cleanup: Replace IMB_processor_apply_threaded_scanlines with parallel loops

Code can become quite a bit smaller without intermediate structs

Pull Request: https://projects.blender.org/blender/blender/pulls/135299
This commit is contained in:
Aras Pranckevicius
2025-02-28 14:43:05 +01:00
committed by Aras Pranckevicius
parent a55dbea0e7
commit 5f667fa2bc
6 changed files with 100 additions and 383 deletions

View File

@@ -13,6 +13,7 @@
#include "BLI_math_base.h"
#include "BLI_math_color.h"
#include "BLI_math_vector.h"
#include "BLI_task.hh"
#include "BKE_image.hh"
@@ -21,67 +22,32 @@
#include "BLF_api.hh"
struct FillColorThreadData {
uchar *rect;
float *rect_float;
int width;
float color[4];
};
static void image_buf_fill_color_slice(
uchar *rect, float *rect_float, int width, int height, const float color[4])
{
int x, y;
/* blank image */
if (rect_float) {
for (y = 0; y < height; y++) {
for (x = 0; x < width; x++) {
copy_v4_v4(rect_float, color);
rect_float += 4;
}
}
}
if (rect) {
uchar ccol[4];
rgba_float_to_uchar(ccol, color);
for (y = 0; y < height; y++) {
for (x = 0; x < width; x++) {
rect[0] = ccol[0];
rect[1] = ccol[1];
rect[2] = ccol[2];
rect[3] = ccol[3];
rect += 4;
}
}
}
}
static void image_buf_fill_color_thread_do(void *data_v, int scanline)
{
FillColorThreadData *data = (FillColorThreadData *)data_v;
const int num_scanlines = 1;
size_t offset = size_t(scanline) * data->width * 4;
uchar *rect = (data->rect != nullptr) ? (data->rect + offset) : nullptr;
float *rect_float = (data->rect_float != nullptr) ? (data->rect_float + offset) : nullptr;
image_buf_fill_color_slice(rect, rect_float, data->width, num_scanlines, data->color);
}
void BKE_image_buf_fill_color(
uchar *rect, float *rect_float, int width, int height, const float color[4])
uchar *rect_byte, float *rect_float, int width, int height, const float color[4])
{
if (size_t(width) * height < 64 * 64) {
image_buf_fill_color_slice(rect, rect_float, width, height, color);
}
else {
FillColorThreadData data;
data.rect = rect;
data.rect_float = rect_float;
data.width = width;
copy_v4_v4(data.color, color);
IMB_processor_apply_threaded_scanlines(height, image_buf_fill_color_thread_do, &data);
}
using namespace blender;
threading::parallel_for(
IndexRange(int64_t(width) * height), 64 * 1024, [&](const IndexRange i_range) {
if (rect_float != nullptr) {
float *dst = rect_float + i_range.first() * 4;
for ([[maybe_unused]] const int64_t i : i_range) {
copy_v4_v4(dst, color);
dst += 4;
}
}
if (rect_byte != nullptr) {
uchar ccol[4];
rgba_float_to_uchar(ccol, color);
uchar *dst = rect_byte + i_range.first() * 4;
for ([[maybe_unused]] const int64_t i : i_range) {
dst[0] = ccol[0];
dst[1] = ccol[1];
dst[2] = ccol[2];
dst[3] = ccol[3];
dst += 4;
}
}
});
}
static void image_buf_fill_checker_slice(
@@ -181,34 +147,15 @@ static void image_buf_fill_checker_slice(
}
}
struct FillCheckerThreadData {
uchar *rect;
float *rect_float;
int width;
};
static void image_buf_fill_checker_thread_do(void *data_v, int scanline)
{
FillCheckerThreadData *data = (FillCheckerThreadData *)data_v;
size_t offset = size_t(scanline) * data->width * 4;
const int num_scanlines = 1;
uchar *rect = (data->rect != nullptr) ? (data->rect + offset) : nullptr;
float *rect_float = (data->rect_float != nullptr) ? (data->rect_float + offset) : nullptr;
image_buf_fill_checker_slice(rect, rect_float, data->width, num_scanlines, scanline);
}
void BKE_image_buf_fill_checker(uchar *rect, float *rect_float, int width, int height)
{
if (size_t(width) * height < 64 * 64) {
image_buf_fill_checker_slice(rect, rect_float, width, height, 0);
}
else {
FillCheckerThreadData data;
data.rect = rect;
data.rect_float = rect_float;
data.width = width;
IMB_processor_apply_threaded_scanlines(height, image_buf_fill_checker_thread_do, &data);
}
using namespace blender;
threading::parallel_for(IndexRange(height), 64, [&](const IndexRange y_range) {
int64_t offset = y_range.first() * width * 4;
uchar *dst_byte = (rect != nullptr) ? (rect + offset) : nullptr;
float *dst_float = (rect_float != nullptr) ? (rect_float + offset) : nullptr;
image_buf_fill_checker_slice(dst_byte, dst_float, width, y_range.size(), y_range.first());
});
}
/* Utility functions for BKE_image_buf_fill_checker_color */
@@ -425,36 +372,16 @@ static void checker_board_color_prepare_slice(
checker_board_grid_fill(rect, rect_float, width, height, 1.0f / 4.0f, offset);
}
struct FillCheckerColorThreadData {
uchar *rect;
float *rect_float;
int width, height;
};
static void checker_board_color_prepare_thread_do(void *data_v, int scanline)
{
FillCheckerColorThreadData *data = (FillCheckerColorThreadData *)data_v;
const int num_scanlines = 1;
size_t offset = size_t(data->width) * scanline * 4;
uchar *rect = (data->rect != nullptr) ? (data->rect + offset) : nullptr;
float *rect_float = (data->rect_float != nullptr) ? (data->rect_float + offset) : nullptr;
checker_board_color_prepare_slice(
rect, rect_float, data->width, num_scanlines, scanline, data->height);
}
void BKE_image_buf_fill_checker_color(uchar *rect, float *rect_float, int width, int height)
{
if (size_t(width) * height < 64 * 64) {
checker_board_color_prepare_slice(rect, rect_float, width, height, 0, height);
}
else {
FillCheckerColorThreadData data;
data.rect = rect;
data.rect_float = rect_float;
data.width = width;
data.height = height;
IMB_processor_apply_threaded_scanlines(height, checker_board_color_prepare_thread_do, &data);
}
using namespace blender;
threading::parallel_for(IndexRange(height), 64, [&](const IndexRange y_range) {
int64_t offset = y_range.first() * width * 4;
uchar *dst_byte = (rect != nullptr) ? (rect + offset) : nullptr;
float *dst_float = (rect_float != nullptr) ? (rect_float + offset) : nullptr;
checker_board_color_prepare_slice(
dst_byte, dst_float, width, y_range.size(), y_range.first(), height);
});
checker_board_text(rect, rect_float, width, height, 128, 2);

View File

@@ -575,14 +575,6 @@ void imb_freerectImbuf_all(ImBuf *ibuf);
* The ibuf can be nullptr, in which case the function does nothing. */
void IMB_free_gpu_textures(ImBuf *ibuf);
/**
* Threaded processors.
*/
using ScanlineThreadFunc = void (*)(void *custom_data, int scanline);
void IMB_processor_apply_threaded_scanlines(int total_scanlines,
ScanlineThreadFunc do_thread,
void *custom_data);
/**
* \brief Transform modes to use for IMB_transform function.
*

View File

@@ -34,7 +34,6 @@
#include "BLI_path_utils.hh"
#include "BLI_rect.h"
#include "BLI_string.h"
#include "BLI_task.h"
#include "BLI_task.hh"
#include "BLI_threads.h"
@@ -2341,43 +2340,6 @@ void IMB_colormanagement_imbuf_to_byte_texture(uchar *out_buffer,
}
}
struct ImbufByteToFloatData {
OCIO_ConstCPUProcessorRcPtr *processor;
int width;
int offset, stride;
const uchar *in_buffer;
float *out_buffer;
bool use_premultiply;
};
static void imbuf_byte_to_float_cb(void *__restrict userdata,
const int y,
const TaskParallelTLS *__restrict /*tls*/)
{
ImbufByteToFloatData *data = static_cast<ImbufByteToFloatData *>(userdata);
const size_t in_offset = data->offset + y * data->stride;
const size_t out_offset = y * data->width;
const uchar *in = data->in_buffer + in_offset * 4;
float *out = data->out_buffer + out_offset * 4;
/* Convert to scene linear, to sRGB and premultiply. */
for (int x = 0; x < data->width; x++, in += 4, out += 4) {
float pixel[4];
rgba_uchar_to_float(pixel, in);
if (data->processor) {
OCIO_cpuProcessorApplyRGB(data->processor, pixel);
}
else {
srgb_to_linearrgb_v3_v3(pixel, pixel);
}
if (data->use_premultiply) {
mul_v3_fl(pixel, pixel[3]);
}
copy_v4_v4(out, pixel);
}
}
void IMB_colormanagement_imbuf_to_float_texture(float *out_buffer,
const int offset_x,
const int offset_y,
@@ -2386,6 +2348,8 @@ void IMB_colormanagement_imbuf_to_float_texture(float *out_buffer,
const ImBuf *ibuf,
const bool store_premultiplied)
{
using namespace blender;
/* Float texture are stored in scene linear color space, with premultiplied
* alpha depending on the image alpha mode. */
if (ibuf->float_buffer.data) {
@@ -2441,19 +2405,29 @@ void IMB_colormanagement_imbuf_to_float_texture(float *out_buffer,
ibuf->byte_buffer.colorspace) :
nullptr;
ImbufByteToFloatData data = {};
data.processor = processor;
data.width = width;
data.offset = offset_y * ibuf->x + offset_x;
data.stride = ibuf->x;
data.in_buffer = in_buffer;
data.out_buffer = out_buffer;
data.use_premultiply = use_premultiply;
TaskParallelSettings settings;
BLI_parallel_range_settings_defaults(&settings);
settings.use_threading = (height > 128);
BLI_task_parallel_range(0, height, &data, imbuf_byte_to_float_cb, &settings);
threading::parallel_for(IndexRange(height), 128, [&](const IndexRange y_range) {
for (const int y : y_range) {
const size_t in_offset = (offset_y + y) * ibuf->x + offset_x;
const size_t out_offset = y * width;
const uchar *in = in_buffer + in_offset * 4;
float *out = out_buffer + out_offset * 4;
for (int x = 0; x < width; x++, in += 4, out += 4) {
/* Convert to scene linear and premultiply. */
float pixel[4];
rgba_uchar_to_float(pixel, in);
if (processor) {
OCIO_cpuProcessorApplyRGB(processor, pixel);
}
else {
srgb_to_linearrgb_v3_v3(pixel, pixel);
}
if (use_premultiply) {
mul_v3_fl(pixel, pixel[3]);
}
copy_v4_v4(out, pixel);
}
}
});
}
}
@@ -3723,38 +3697,6 @@ static void partial_buffer_update_rect(ImBuf *ibuf,
}
}
struct PartialThreadData {
ImBuf *ibuf;
uchar *display_buffer;
const float *linear_buffer;
const uchar *byte_buffer;
int display_stride;
int linear_stride;
int linear_offset_x, linear_offset_y;
ColormanageProcessor *cm_processor;
int xmin, ymin, xmax;
};
static void partial_buffer_update_rect_thread_do(void *data_v, int scanline)
{
PartialThreadData *data = (PartialThreadData *)data_v;
int ymin = data->ymin + scanline;
const int num_scanlines = 1;
partial_buffer_update_rect(data->ibuf,
data->display_buffer,
data->linear_buffer,
data->byte_buffer,
data->display_stride,
data->linear_stride,
data->linear_offset_x,
data->linear_offset_y,
data->cm_processor,
data->xmin,
ymin,
data->xmax,
ymin + num_scanlines);
}
static void imb_partial_display_buffer_update_ex(
ImBuf *ibuf,
const float *linear_buffer,
@@ -3770,6 +3712,7 @@ static void imb_partial_display_buffer_update_ex(
int ymax,
bool do_threads)
{
using namespace blender;
ColormanageCacheViewSettings cache_view_settings;
ColormanageCacheDisplaySettings cache_display_settings;
void *cache_handle = nullptr;
@@ -3827,38 +3770,23 @@ static void imb_partial_display_buffer_update_ex(
cm_processor = IMB_colormanagement_display_processor_new(view_settings, display_settings);
}
if (do_threads) {
PartialThreadData data;
data.ibuf = ibuf;
data.display_buffer = display_buffer;
data.linear_buffer = linear_buffer;
data.byte_buffer = byte_buffer;
data.display_stride = buffer_width;
data.linear_stride = stride;
data.linear_offset_x = offset_x;
data.linear_offset_y = offset_y;
data.cm_processor = cm_processor;
data.xmin = xmin;
data.ymin = ymin;
data.xmax = xmax;
IMB_processor_apply_threaded_scanlines(
ymax - ymin, partial_buffer_update_rect_thread_do, &data);
}
else {
partial_buffer_update_rect(ibuf,
display_buffer,
linear_buffer,
byte_buffer,
buffer_width,
stride,
offset_x,
offset_y,
cm_processor,
xmin,
ymin,
xmax,
ymax);
}
threading::parallel_for(IndexRange(ymin, ymax - ymin),
do_threads ? 64 : ymax - ymin,
[&](const IndexRange y_range) {
partial_buffer_update_rect(ibuf,
display_buffer,
linear_buffer,
byte_buffer,
buffer_width,
stride,
offset_x,
offset_y,
cm_processor,
xmin,
y_range.first(),
xmax,
y_range.one_after_last());
});
if (cm_processor) {
IMB_colormanagement_processor_free(cm_processor);

View File

@@ -448,36 +448,6 @@ void IMB_buffer_float_from_float(float *rect_to,
}
}
struct FloatToFloatThreadData {
float *rect_to;
const float *rect_from;
int channels_from;
int profile_to;
int profile_from;
bool predivide;
int width;
int stride_to;
int stride_from;
};
static void imb_buffer_float_from_float_thread_do(void *data_v, int scanline)
{
const int num_scanlines = 1;
FloatToFloatThreadData *data = (FloatToFloatThreadData *)data_v;
size_t offset_from = size_t(scanline) * data->stride_from * data->channels_from;
size_t offset_to = size_t(scanline) * data->stride_to * 4;
IMB_buffer_float_from_float(data->rect_to + offset_to,
data->rect_from + offset_from,
data->channels_from,
data->profile_to,
data->profile_from,
data->predivide,
data->width,
num_scanlines,
data->stride_to,
data->stride_from);
}
void IMB_buffer_float_from_float_threaded(float *rect_to,
const float *rect_from,
int channels_from,
@@ -489,31 +459,21 @@ void IMB_buffer_float_from_float_threaded(float *rect_to,
int stride_to,
int stride_from)
{
if (size_t(width) * height < 64 * 64) {
IMB_buffer_float_from_float(rect_to,
rect_from,
using namespace blender;
threading::parallel_for(IndexRange(height), 64, [&](const IndexRange y_range) {
int64_t offset_from = y_range.first() * stride_from * channels_from;
int64_t offset_to = y_range.first() * stride_to * 4;
IMB_buffer_float_from_float(rect_to + offset_to,
rect_from + offset_from,
channels_from,
profile_to,
profile_from,
predivide,
width,
height,
y_range.size(),
stride_to,
stride_from);
}
else {
FloatToFloatThreadData data;
data.rect_to = rect_to;
data.rect_from = rect_from;
data.channels_from = channels_from;
data.profile_to = profile_to;
data.profile_from = profile_from;
data.predivide = predivide;
data.width = width;
data.stride_to = stride_to;
data.stride_from = stride_from;
IMB_processor_apply_threaded_scanlines(height, imb_buffer_float_from_float_thread_do, &data);
}
});
}
void IMB_buffer_float_from_float_mask(float *rect_to,

View File

@@ -7,14 +7,9 @@
* \ingroup imbuf
*/
#include <cstdlib>
#include "MEM_guardedalloc.h"
#include "BLI_task.h"
#include "BLI_math_vector.h"
#include "BLI_task.hh"
#include "IMB_colormanagement.hh"
#include "IMB_imbuf.hh"
#include "IMB_imbuf_types.hh"
@@ -53,38 +48,6 @@ void IMB_convert_rgba_to_abgr(ImBuf *ibuf)
}
}
/* -------------------------------------------------------------------- */
/** \name Threaded Image Processing
* \{ */
struct ScanlineGlobalData {
void *custom_data;
ScanlineThreadFunc do_thread;
};
static void processor_apply_parallel(void *__restrict userdata,
const int scanline,
const TaskParallelTLS *__restrict /*tls*/)
{
ScanlineGlobalData *data = static_cast<ScanlineGlobalData *>(userdata);
data->do_thread(data->custom_data, scanline);
}
void IMB_processor_apply_threaded_scanlines(int total_scanlines,
ScanlineThreadFunc do_thread,
void *custom_data)
{
TaskParallelSettings settings;
ScanlineGlobalData data = {};
data.do_thread = do_thread;
data.custom_data = custom_data;
BLI_parallel_range_settings_defaults(&settings);
BLI_task_parallel_range(0, total_scanlines, &data, processor_apply_parallel, &settings);
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Alpha-under
* \{ */

View File

@@ -14,6 +14,7 @@
#include "BLI_math_color_blend.h"
#include "BLI_math_vector.h"
#include "BLI_rect.h"
#include "BLI_task.hh"
#include "BLI_utildefines.h"
#include "IMB_imbuf.hh"
@@ -935,41 +936,6 @@ void IMB_rectblend(ImBuf *dbuf,
}
}
struct RectBlendThreadData {
ImBuf *dbuf;
const ImBuf *obuf, *sbuf;
ushort *dmask;
const ushort *curvemask, *texmask;
float mask_max;
int destx, desty, origx, origy;
int srcx, srcy, width;
IMB_BlendMode mode;
bool accumulate;
};
static void rectblend_thread_do(void *data_v, int scanline)
{
const int num_scanlines = 1;
RectBlendThreadData *data = (RectBlendThreadData *)data_v;
IMB_rectblend(data->dbuf,
data->obuf,
data->sbuf,
data->dmask,
data->curvemask,
data->texmask,
data->mask_max,
data->destx,
data->desty + scanline,
data->origx,
data->origy + scanline,
data->srcx,
data->srcy + scanline,
data->width,
num_scanlines,
data->mode,
data->accumulate);
}
void IMB_rectblend_threaded(ImBuf *dbuf,
const ImBuf *obuf,
const ImBuf *sbuf,
@@ -988,7 +954,8 @@ void IMB_rectblend_threaded(ImBuf *dbuf,
IMB_BlendMode mode,
bool accumulate)
{
if (size_t(width) * height < 64 * 64) {
using namespace blender;
threading::parallel_for(IndexRange(height), 16, [&](const IndexRange y_range) {
IMB_rectblend(dbuf,
obuf,
sbuf,
@@ -997,36 +964,16 @@ void IMB_rectblend_threaded(ImBuf *dbuf,
texmask,
mask_max,
destx,
desty,
desty + y_range.first(),
origx,
origy,
origy + y_range.first(),
srcx,
srcy,
srcy + y_range.first(),
width,
height,
y_range.size(),
mode,
accumulate);
}
else {
RectBlendThreadData data;
data.dbuf = dbuf;
data.obuf = obuf;
data.sbuf = sbuf;
data.dmask = dmask;
data.curvemask = curvemask;
data.texmask = texmask;
data.mask_max = mask_max;
data.destx = destx;
data.desty = desty;
data.origx = origx;
data.origy = origy;
data.srcx = srcx;
data.srcy = srcy;
data.width = width;
data.mode = mode;
data.accumulate = accumulate;
IMB_processor_apply_threaded_scanlines(height, rectblend_thread_do, &data);
}
});
}
void IMB_rectfill(ImBuf *drect, const float col[4])