Files
test/source/blender/compositor/intern/COM_WorkScheduler.cc
Campbell Barton e955c94ed3 License Headers: Set copyright to "Blender Authors", add AUTHORS
Listing the "Blender Foundation" as copyright holder implied the Blender
Foundation holds copyright to files which may include work from many
developers.

While keeping copyright on headers makes sense for isolated libraries,
Blender's own code may be refactored or moved between files in a way
that makes the per file copyright holders less meaningful.

Copyright references to the "Blender Foundation" have been replaced with
"Blender Authors", with the exception of `./extern/` since these this
contains libraries which are more isolated, any changed to license
headers there can be handled on a case-by-case basis.

Some directories in `./intern/` have also been excluded:

- `./intern/cycles/` it's own `AUTHORS` file is planned.
- `./intern/opensubdiv/`.

An "AUTHORS" file has been added, using the chromium projects authors
file as a template.

Design task: #110784

Ref !110783.
2023-08-16 00:20:26 +10:00

578 lines
16 KiB
C++

/* SPDX-FileCopyrightText: 2011 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#include "COM_WorkScheduler.h"
#include "COM_CPUDevice.h"
#include "COM_CompositorContext.h"
#include "COM_ExecutionGroup.h"
#include "COM_OpenCLDevice.h"
#include "COM_OpenCLKernels.cl.h"
#include "COM_WriteBufferOperation.h"
#include "clew.h"
#include "MEM_guardedalloc.h"
#include "BLI_task.h"
#include "BLI_threads.h"
#include "BLI_vector.hh"
#include "BKE_global.h"
namespace blender::compositor {
enum class ThreadingModel {
/** Everything is executed in the caller thread. easy for debugging. */
SingleThreaded,
/** Multi-threaded model, which uses the BLI_thread_queue pattern. */
Queue,
/** Uses BLI_task as threading backend. */
Task
};
/**
* Returns the active threading model.
*
* Default is `ThreadingModel::Queue`.
*/
constexpr ThreadingModel COM_threading_model()
{
return ThreadingModel::Queue;
}
/**
* Does the active threading model support opencl?
*/
constexpr bool COM_is_opencl_enabled()
{
return COM_threading_model() != ThreadingModel::SingleThreaded;
}
static ThreadLocal(CPUDevice *) g_thread_device;
static struct {
struct {
/** \brief list of all CPUDevices. for every hardware thread an instance of CPUDevice is
* created
*/
Vector<CPUDevice> devices;
/** \brief list of all thread for every CPUDevice in cpudevices a thread exists. */
ListBase threads;
bool initialized = false;
/** \brief all scheduled work for the cpu */
ThreadQueue *queue;
} queue;
struct {
TaskPool *pool;
} task;
struct {
ThreadQueue *queue;
cl_context context;
cl_program program;
/** \brief list of all OpenCLDevices. for every OpenCL GPU device an instance of OpenCLDevice
* is created. */
Vector<OpenCLDevice> devices;
/** \brief list of all thread for every GPUDevice in cpudevices a thread exists. */
ListBase threads;
/** \brief all scheduled work for the GPU. */
bool active = false;
bool initialized = false;
} opencl;
int num_cpu_threads;
} g_work_scheduler;
/* -------------------------------------------------------------------- */
/** \name OpenCL Scheduling
* \{ */
static void CL_CALLBACK cl_context_error(const char *errinfo,
const void * /*private_info*/,
size_t /*cb*/,
void * /*user_data*/)
{
printf("OPENCL error: %s\n", errinfo);
}
static void *thread_execute_gpu(void *data)
{
Device *device = (Device *)data;
WorkPackage *work;
while ((work = (WorkPackage *)BLI_thread_queue_pop(g_work_scheduler.opencl.queue))) {
device->execute(work);
}
return nullptr;
}
static void opencl_start(const CompositorContext &context)
{
if (context.get_has_active_opencl_devices()) {
g_work_scheduler.opencl.queue = BLI_thread_queue_init();
BLI_threadpool_init(&g_work_scheduler.opencl.threads,
thread_execute_gpu,
g_work_scheduler.opencl.devices.size());
for (Device &device : g_work_scheduler.opencl.devices) {
BLI_threadpool_insert(&g_work_scheduler.opencl.threads, &device);
}
g_work_scheduler.opencl.active = true;
}
else {
g_work_scheduler.opencl.active = false;
}
}
static bool opencl_schedule(WorkPackage *package)
{
if (package->type == eWorkPackageType::Tile && package->execution_group->get_flags().open_cl &&
g_work_scheduler.opencl.active)
{
BLI_thread_queue_push(g_work_scheduler.opencl.queue, package);
return true;
}
return false;
}
static void opencl_finish()
{
if (g_work_scheduler.opencl.active) {
BLI_thread_queue_wait_finish(g_work_scheduler.opencl.queue);
}
}
static void opencl_stop()
{
if (g_work_scheduler.opencl.active) {
BLI_thread_queue_nowait(g_work_scheduler.opencl.queue);
BLI_threadpool_end(&g_work_scheduler.opencl.threads);
BLI_thread_queue_free(g_work_scheduler.opencl.queue);
g_work_scheduler.opencl.queue = nullptr;
}
}
static bool opencl_has_gpu_devices()
{
return !g_work_scheduler.opencl.devices.is_empty();
}
static void opencl_initialize(const bool use_opencl)
{
/* deinitialize OpenCL GPU's */
if (use_opencl && !g_work_scheduler.opencl.initialized) {
g_work_scheduler.opencl.context = nullptr;
g_work_scheduler.opencl.program = nullptr;
/* This will check for errors and skip if already initialized. */
if (clewInit() != CLEW_SUCCESS) {
return;
}
if (clCreateContextFromType) {
cl_uint number_of_platforms = 0;
cl_int error;
error = clGetPlatformIDs(0, nullptr, &number_of_platforms);
if (error == -1001) {
} /* GPU not supported */
else if (error != CL_SUCCESS) {
printf("CLERROR[%d]: %s\n", error, clewErrorString(error));
}
if (G.f & G_DEBUG) {
printf("%u number of platforms\n", number_of_platforms);
}
cl_platform_id *platforms = (cl_platform_id *)MEM_mallocN(
sizeof(cl_platform_id) * number_of_platforms, __func__);
error = clGetPlatformIDs(number_of_platforms, platforms, nullptr);
uint index_platform;
for (index_platform = 0; index_platform < number_of_platforms; index_platform++) {
cl_platform_id platform = platforms[index_platform];
cl_uint number_of_devices = 0;
clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, nullptr, &number_of_devices);
if (number_of_devices <= 0) {
continue;
}
cl_device_id *cldevices = (cl_device_id *)MEM_mallocN(
sizeof(cl_device_id) * number_of_devices, __func__);
clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, number_of_devices, cldevices, nullptr);
g_work_scheduler.opencl.context = clCreateContext(
nullptr, number_of_devices, cldevices, cl_context_error, nullptr, &error);
if (error != CL_SUCCESS) {
printf("CLERROR[%d]: %s\n", error, clewErrorString(error));
}
const char *cl_str[2] = {datatoc_COM_OpenCLKernels_cl, nullptr};
g_work_scheduler.opencl.program = clCreateProgramWithSource(
g_work_scheduler.opencl.context, 1, cl_str, nullptr, &error);
error = clBuildProgram(g_work_scheduler.opencl.program,
number_of_devices,
cldevices,
nullptr,
nullptr,
nullptr);
if (error != CL_SUCCESS) {
cl_int error2;
size_t ret_val_size = 0;
printf("CLERROR[%d]: %s\n", error, clewErrorString(error));
error2 = clGetProgramBuildInfo(g_work_scheduler.opencl.program,
cldevices[0],
CL_PROGRAM_BUILD_LOG,
0,
nullptr,
&ret_val_size);
if (error2 != CL_SUCCESS) {
printf("CLERROR[%d]: %s\n", error, clewErrorString(error));
}
char *build_log = (char *)MEM_mallocN(sizeof(char) * ret_val_size + 1, __func__);
error2 = clGetProgramBuildInfo(g_work_scheduler.opencl.program,
cldevices[0],
CL_PROGRAM_BUILD_LOG,
ret_val_size,
build_log,
nullptr);
if (error2 != CL_SUCCESS) {
printf("CLERROR[%d]: %s\n", error, clewErrorString(error));
}
build_log[ret_val_size] = '\0';
printf("%s", build_log);
MEM_freeN(build_log);
}
else {
uint index_devices;
for (index_devices = 0; index_devices < number_of_devices; index_devices++) {
cl_device_id device = cldevices[index_devices];
cl_int vendorID = 0;
cl_int error2 = clGetDeviceInfo(
device, CL_DEVICE_VENDOR_ID, sizeof(cl_int), &vendorID, nullptr);
if (error2 != CL_SUCCESS) {
printf("CLERROR[%d]: %s\n", error2, clewErrorString(error2));
}
g_work_scheduler.opencl.devices.append_as(g_work_scheduler.opencl.context,
device,
g_work_scheduler.opencl.program,
vendorID);
}
}
MEM_freeN(cldevices);
}
MEM_freeN(platforms);
}
g_work_scheduler.opencl.initialized = true;
}
}
static void opencl_deinitialize()
{
g_work_scheduler.opencl.devices.clear_and_shrink();
if (g_work_scheduler.opencl.program) {
clReleaseProgram(g_work_scheduler.opencl.program);
g_work_scheduler.opencl.program = nullptr;
}
if (g_work_scheduler.opencl.context) {
clReleaseContext(g_work_scheduler.opencl.context);
g_work_scheduler.opencl.context = nullptr;
}
g_work_scheduler.opencl.initialized = false;
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Single threaded Scheduling
* \{ */
static void threading_model_single_thread_execute(WorkPackage *package)
{
CPUDevice device(0);
device.execute(package);
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Queue Scheduling
* \{ */
static void *threading_model_queue_execute(void *data)
{
CPUDevice *device = (CPUDevice *)data;
WorkPackage *work;
BLI_thread_local_set(g_thread_device, device);
while ((work = (WorkPackage *)BLI_thread_queue_pop(g_work_scheduler.queue.queue))) {
device->execute(work);
}
return nullptr;
}
static void threading_model_queue_schedule(WorkPackage *package)
{
BLI_thread_queue_push(g_work_scheduler.queue.queue, package);
}
static void threading_model_queue_start()
{
g_work_scheduler.queue.queue = BLI_thread_queue_init();
BLI_threadpool_init(&g_work_scheduler.queue.threads,
threading_model_queue_execute,
g_work_scheduler.queue.devices.size());
for (Device &device : g_work_scheduler.queue.devices) {
BLI_threadpool_insert(&g_work_scheduler.queue.threads, &device);
}
}
static void threading_model_queue_finish()
{
BLI_thread_queue_wait_finish(g_work_scheduler.queue.queue);
}
static void threading_model_queue_stop()
{
BLI_thread_queue_nowait(g_work_scheduler.queue.queue);
BLI_threadpool_end(&g_work_scheduler.queue.threads);
BLI_thread_queue_free(g_work_scheduler.queue.queue);
g_work_scheduler.queue.queue = nullptr;
}
static void threading_model_queue_initialize(const int num_cpu_threads)
{
/* Reinitialize if number of threads doesn't match. */
if (g_work_scheduler.queue.devices.size() != num_cpu_threads) {
g_work_scheduler.queue.devices.clear();
if (g_work_scheduler.queue.initialized) {
BLI_thread_local_delete(g_thread_device);
g_work_scheduler.queue.initialized = false;
}
}
/* Initialize CPU threads. */
if (!g_work_scheduler.queue.initialized) {
for (int index = 0; index < num_cpu_threads; index++) {
g_work_scheduler.queue.devices.append_as(index);
}
BLI_thread_local_create(g_thread_device);
g_work_scheduler.queue.initialized = true;
}
}
static void threading_model_queue_deinitialize()
{
/* deinitialize CPU threads */
if (g_work_scheduler.queue.initialized) {
g_work_scheduler.queue.devices.clear_and_shrink();
BLI_thread_local_delete(g_thread_device);
g_work_scheduler.queue.initialized = false;
}
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Task Scheduling
* \{ */
static void threading_model_task_execute(TaskPool *__restrict /*pool*/, void *task_data)
{
WorkPackage *package = static_cast<WorkPackage *>(task_data);
CPUDevice device(BLI_task_parallel_thread_id(nullptr));
BLI_thread_local_set(g_thread_device, &device);
device.execute(package);
}
static void threading_model_task_schedule(WorkPackage *package)
{
BLI_task_pool_push(
g_work_scheduler.task.pool, threading_model_task_execute, package, false, nullptr);
}
static void threading_model_task_start()
{
BLI_thread_local_create(g_thread_device);
g_work_scheduler.task.pool = BLI_task_pool_create(nullptr, TASK_PRIORITY_HIGH);
}
static void threading_model_task_finish()
{
BLI_task_pool_work_and_wait(g_work_scheduler.task.pool);
}
static void threading_model_task_stop()
{
BLI_task_pool_free(g_work_scheduler.task.pool);
g_work_scheduler.task.pool = nullptr;
BLI_thread_local_delete(g_thread_device);
}
/** \} */
/* -------------------------------------------------------------------- */
/** \name Public API
* \{ */
void WorkScheduler::schedule(WorkPackage *package)
{
if (COM_is_opencl_enabled()) {
if (opencl_schedule(package)) {
return;
}
}
switch (COM_threading_model()) {
case ThreadingModel::SingleThreaded: {
threading_model_single_thread_execute(package);
break;
}
case ThreadingModel::Queue: {
threading_model_queue_schedule(package);
break;
}
case ThreadingModel::Task: {
threading_model_task_schedule(package);
break;
}
}
}
void WorkScheduler::start(const CompositorContext &context)
{
if (COM_is_opencl_enabled()) {
opencl_start(context);
}
switch (COM_threading_model()) {
case ThreadingModel::SingleThreaded:
/* Nothing to do. */
break;
case ThreadingModel::Queue:
threading_model_queue_start();
break;
case ThreadingModel::Task:
threading_model_task_start();
break;
}
}
void WorkScheduler::finish()
{
if (COM_is_opencl_enabled()) {
opencl_finish();
}
switch (COM_threading_model()) {
case ThreadingModel::SingleThreaded:
/* Nothing to do. */
break;
case ThreadingModel::Queue:
threading_model_queue_finish();
break;
case ThreadingModel::Task:
threading_model_task_finish();
break;
}
}
void WorkScheduler::stop()
{
if (COM_is_opencl_enabled()) {
opencl_stop();
}
switch (COM_threading_model()) {
case ThreadingModel::SingleThreaded:
/* Nothing to do. */
break;
case ThreadingModel::Queue:
threading_model_queue_stop();
break;
case ThreadingModel::Task:
threading_model_task_stop();
break;
}
}
bool WorkScheduler::has_gpu_devices()
{
if (COM_is_opencl_enabled()) {
return opencl_has_gpu_devices();
}
return false;
}
void WorkScheduler::initialize(bool use_opencl, int num_cpu_threads)
{
if (COM_is_opencl_enabled()) {
opencl_initialize(use_opencl);
}
g_work_scheduler.num_cpu_threads = num_cpu_threads;
switch (COM_threading_model()) {
case ThreadingModel::SingleThreaded:
g_work_scheduler.num_cpu_threads = 1;
/* Nothing to do. */
break;
case ThreadingModel::Queue:
threading_model_queue_initialize(num_cpu_threads);
break;
case ThreadingModel::Task:
/* Nothing to do. */
break;
}
}
void WorkScheduler::deinitialize()
{
if (COM_is_opencl_enabled()) {
opencl_deinitialize();
}
switch (COM_threading_model()) {
case ThreadingModel::SingleThreaded:
/* Nothing to do. */
break;
case ThreadingModel::Queue:
threading_model_queue_deinitialize();
break;
case ThreadingModel::Task:
/* Nothing to do. */
break;
}
}
int WorkScheduler::get_num_cpu_threads()
{
return g_work_scheduler.num_cpu_threads;
}
int WorkScheduler::current_thread_id()
{
if (COM_threading_model() == ThreadingModel::SingleThreaded) {
return 0;
}
CPUDevice *device = (CPUDevice *)BLI_thread_local_get(g_thread_device);
return device->thread_id();
}
/** \} */
} // namespace blender::compositor