2023-06-14 16:52:36 +10:00
|
|
|
/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
|
|
|
|
|
*
|
|
|
|
|
* SPDX-License-Identifier: Apache-2.0 */
|
Cycles: merge of cycles-x branch, a major update to the renderer
This includes much improved GPU rendering performance, viewport interactivity,
new shadow catcher, revamped sampling settings, subsurface scattering anisotropy,
new GPU volume sampling, improved PMJ sampling pattern, and more.
Some features have also been removed or changed, breaking backwards compatibility.
Including the removal of the OpenCL backend, for which alternatives are under
development.
Release notes and code docs:
https://wiki.blender.org/wiki/Reference/Release_Notes/3.0/Cycles
https://wiki.blender.org/wiki/Source/Render/Cycles
Credits:
* Sergey Sharybin
* Brecht Van Lommel
* Patrick Mours (OptiX backend)
* Christophe Hery (subsurface scattering anisotropy)
* William Leeson (PMJ sampling pattern)
* Alaska (various fixes and tweaks)
* Thomas Dinges (various fixes)
For the full commit history, see the cycles-x branch. This squashes together
all the changes since intermediate changes would often fail building or tests.
Ref T87839, T87837, T87836
Fixes T90734, T89353, T80267, T80267, T77185, T69800
2021-09-20 17:59:20 +02:00
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
|
2021-10-24 14:19:19 +02:00
|
|
|
#include "device/kernel.h"
|
Cycles: merge of cycles-x branch, a major update to the renderer
This includes much improved GPU rendering performance, viewport interactivity,
new shadow catcher, revamped sampling settings, subsurface scattering anisotropy,
new GPU volume sampling, improved PMJ sampling pattern, and more.
Some features have also been removed or changed, breaking backwards compatibility.
Including the removal of the OpenCL backend, for which alternatives are under
development.
Release notes and code docs:
https://wiki.blender.org/wiki/Reference/Release_Notes/3.0/Cycles
https://wiki.blender.org/wiki/Source/Render/Cycles
Credits:
* Sergey Sharybin
* Brecht Van Lommel
* Patrick Mours (OptiX backend)
* Christophe Hery (subsurface scattering anisotropy)
* William Leeson (PMJ sampling pattern)
* Alaska (various fixes and tweaks)
* Thomas Dinges (various fixes)
For the full commit history, see the cycles-x branch. This squashes together
all the changes since intermediate changes would often fail building or tests.
Ref T87839, T87837, T87836
Fixes T90734, T89353, T80267, T80267, T77185, T69800
2021-09-20 17:59:20 +02:00
|
|
|
|
2021-10-24 14:19:19 +02:00
|
|
|
#include "device/graphics_interop.h"
|
2022-01-04 21:39:54 +01:00
|
|
|
#include "util/debug.h"
|
2021-10-24 14:19:19 +02:00
|
|
|
#include "util/log.h"
|
|
|
|
|
#include "util/map.h"
|
|
|
|
|
#include "util/string.h"
|
|
|
|
|
#include "util/unique_ptr.h"
|
Cycles: merge of cycles-x branch, a major update to the renderer
This includes much improved GPU rendering performance, viewport interactivity,
new shadow catcher, revamped sampling settings, subsurface scattering anisotropy,
new GPU volume sampling, improved PMJ sampling pattern, and more.
Some features have also been removed or changed, breaking backwards compatibility.
Including the removal of the OpenCL backend, for which alternatives are under
development.
Release notes and code docs:
https://wiki.blender.org/wiki/Reference/Release_Notes/3.0/Cycles
https://wiki.blender.org/wiki/Source/Render/Cycles
Credits:
* Sergey Sharybin
* Brecht Van Lommel
* Patrick Mours (OptiX backend)
* Christophe Hery (subsurface scattering anisotropy)
* William Leeson (PMJ sampling pattern)
* Alaska (various fixes and tweaks)
* Thomas Dinges (various fixes)
For the full commit history, see the cycles-x branch. This squashes together
all the changes since intermediate changes would often fail building or tests.
Ref T87839, T87837, T87836
Fixes T90734, T89353, T80267, T80267, T77185, T69800
2021-09-20 17:59:20 +02:00
|
|
|
|
|
|
|
|
CCL_NAMESPACE_BEGIN
|
|
|
|
|
|
|
|
|
|
class Device;
|
|
|
|
|
class device_memory;
|
|
|
|
|
|
|
|
|
|
struct KernelWorkTile;
|
|
|
|
|
|
2021-11-29 14:49:53 +00:00
|
|
|
/* Container for device kernel arguments with type correctness ensured by API. */
|
|
|
|
|
struct DeviceKernelArguments {
|
|
|
|
|
|
|
|
|
|
enum Type {
|
|
|
|
|
POINTER,
|
|
|
|
|
INT32,
|
|
|
|
|
FLOAT32,
|
|
|
|
|
BOOLEAN,
|
|
|
|
|
KERNEL_FILM_CONVERT,
|
|
|
|
|
};
|
|
|
|
|
|
2022-01-04 21:39:54 +01:00
|
|
|
static const int MAX_ARGS = 18;
|
2021-11-29 14:49:53 +00:00
|
|
|
Type types[MAX_ARGS];
|
|
|
|
|
void *values[MAX_ARGS];
|
|
|
|
|
size_t sizes[MAX_ARGS];
|
|
|
|
|
size_t count = 0;
|
|
|
|
|
|
2023-03-29 16:50:54 +02:00
|
|
|
DeviceKernelArguments() {}
|
2021-11-29 14:49:53 +00:00
|
|
|
|
|
|
|
|
template<class T> DeviceKernelArguments(const T *arg)
|
|
|
|
|
{
|
|
|
|
|
add(arg);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template<class T, class... Args> DeviceKernelArguments(const T *first, Args... args)
|
|
|
|
|
{
|
|
|
|
|
add(first);
|
|
|
|
|
add(args...);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void add(const KernelFilmConvert *value)
|
|
|
|
|
{
|
|
|
|
|
add(KERNEL_FILM_CONVERT, value, sizeof(KernelFilmConvert));
|
|
|
|
|
}
|
|
|
|
|
void add(const device_ptr *value)
|
|
|
|
|
{
|
|
|
|
|
add(POINTER, value, sizeof(device_ptr));
|
|
|
|
|
}
|
|
|
|
|
void add(const int32_t *value)
|
|
|
|
|
{
|
|
|
|
|
add(INT32, value, sizeof(int32_t));
|
|
|
|
|
}
|
|
|
|
|
void add(const float *value)
|
|
|
|
|
{
|
|
|
|
|
add(FLOAT32, value, sizeof(float));
|
|
|
|
|
}
|
|
|
|
|
void add(const bool *value)
|
|
|
|
|
{
|
|
|
|
|
add(BOOLEAN, value, 4);
|
|
|
|
|
}
|
|
|
|
|
void add(const Type type, const void *value, size_t size)
|
|
|
|
|
{
|
2022-01-04 21:39:54 +01:00
|
|
|
assert(count < MAX_ARGS);
|
|
|
|
|
|
2021-11-29 14:49:53 +00:00
|
|
|
types[count] = type;
|
|
|
|
|
values[count] = (void *)value;
|
|
|
|
|
sizes[count] = size;
|
|
|
|
|
count++;
|
|
|
|
|
}
|
|
|
|
|
template<typename T, typename... Args> void add(const T *first, Args... args)
|
|
|
|
|
{
|
|
|
|
|
add(first);
|
|
|
|
|
add(args...);
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
Cycles: merge of cycles-x branch, a major update to the renderer
This includes much improved GPU rendering performance, viewport interactivity,
new shadow catcher, revamped sampling settings, subsurface scattering anisotropy,
new GPU volume sampling, improved PMJ sampling pattern, and more.
Some features have also been removed or changed, breaking backwards compatibility.
Including the removal of the OpenCL backend, for which alternatives are under
development.
Release notes and code docs:
https://wiki.blender.org/wiki/Reference/Release_Notes/3.0/Cycles
https://wiki.blender.org/wiki/Source/Render/Cycles
Credits:
* Sergey Sharybin
* Brecht Van Lommel
* Patrick Mours (OptiX backend)
* Christophe Hery (subsurface scattering anisotropy)
* William Leeson (PMJ sampling pattern)
* Alaska (various fixes and tweaks)
* Thomas Dinges (various fixes)
For the full commit history, see the cycles-x branch. This squashes together
all the changes since intermediate changes would often fail building or tests.
Ref T87839, T87837, T87836
Fixes T90734, T89353, T80267, T80267, T77185, T69800
2021-09-20 17:59:20 +02:00
|
|
|
/* Abstraction of a command queue for a device.
|
|
|
|
|
* Provides API to schedule kernel execution in a specific queue with minimal possible overhead
|
|
|
|
|
* from driver side.
|
|
|
|
|
*
|
|
|
|
|
* This class encapsulates all properties needed for commands execution. */
|
|
|
|
|
class DeviceQueue {
|
|
|
|
|
public:
|
|
|
|
|
virtual ~DeviceQueue();
|
|
|
|
|
|
|
|
|
|
/* Number of concurrent states to process for integrator,
|
|
|
|
|
* based on number of cores and/or available memory. */
|
|
|
|
|
virtual int num_concurrent_states(const size_t state_size) const = 0;
|
|
|
|
|
|
2022-03-07 17:25:50 +01:00
|
|
|
/* Number of states which keeps the device occupied with work without losing performance.
|
Cycles: merge of cycles-x branch, a major update to the renderer
This includes much improved GPU rendering performance, viewport interactivity,
new shadow catcher, revamped sampling settings, subsurface scattering anisotropy,
new GPU volume sampling, improved PMJ sampling pattern, and more.
Some features have also been removed or changed, breaking backwards compatibility.
Including the removal of the OpenCL backend, for which alternatives are under
development.
Release notes and code docs:
https://wiki.blender.org/wiki/Reference/Release_Notes/3.0/Cycles
https://wiki.blender.org/wiki/Source/Render/Cycles
Credits:
* Sergey Sharybin
* Brecht Van Lommel
* Patrick Mours (OptiX backend)
* Christophe Hery (subsurface scattering anisotropy)
* William Leeson (PMJ sampling pattern)
* Alaska (various fixes and tweaks)
* Thomas Dinges (various fixes)
For the full commit history, see the cycles-x branch. This squashes together
all the changes since intermediate changes would often fail building or tests.
Ref T87839, T87837, T87836
Fixes T90734, T89353, T80267, T80267, T77185, T69800
2021-09-20 17:59:20 +02:00
|
|
|
* The renderer will add more work (when available) when number of active paths falls below this
|
|
|
|
|
* value. */
|
2022-10-24 10:23:56 +01:00
|
|
|
virtual int num_concurrent_busy_states(const size_t state_size) const = 0;
|
Cycles: merge of cycles-x branch, a major update to the renderer
This includes much improved GPU rendering performance, viewport interactivity,
new shadow catcher, revamped sampling settings, subsurface scattering anisotropy,
new GPU volume sampling, improved PMJ sampling pattern, and more.
Some features have also been removed or changed, breaking backwards compatibility.
Including the removal of the OpenCL backend, for which alternatives are under
development.
Release notes and code docs:
https://wiki.blender.org/wiki/Reference/Release_Notes/3.0/Cycles
https://wiki.blender.org/wiki/Source/Render/Cycles
Credits:
* Sergey Sharybin
* Brecht Van Lommel
* Patrick Mours (OptiX backend)
* Christophe Hery (subsurface scattering anisotropy)
* William Leeson (PMJ sampling pattern)
* Alaska (various fixes and tweaks)
* Thomas Dinges (various fixes)
For the full commit history, see the cycles-x branch. This squashes together
all the changes since intermediate changes would often fail building or tests.
Ref T87839, T87837, T87836
Fixes T90734, T89353, T80267, T80267, T77185, T69800
2021-09-20 17:59:20 +02:00
|
|
|
|
2022-07-14 16:42:43 +02:00
|
|
|
/* Number of elements in a partition of sorted shaders, that improves memory locality of
|
|
|
|
|
* integrator state fetch at the cost of decreased coherence for shader kernel execution. */
|
|
|
|
|
virtual int num_sort_partition_elements() const
|
2022-07-13 20:56:57 +01:00
|
|
|
{
|
2022-07-14 16:42:43 +02:00
|
|
|
return 65536;
|
2022-07-13 20:56:57 +01:00
|
|
|
}
|
|
|
|
|
|
2023-02-06 11:16:02 +00:00
|
|
|
/* Does device support local atomic sorting kernels (INTEGRATOR_SORT_BUCKET_PASS and
|
|
|
|
|
* INTEGRATOR_SORT_WRITE_PASS)? */
|
|
|
|
|
virtual bool supports_local_atomic_sort() const
|
|
|
|
|
{
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
Cycles: merge of cycles-x branch, a major update to the renderer
This includes much improved GPU rendering performance, viewport interactivity,
new shadow catcher, revamped sampling settings, subsurface scattering anisotropy,
new GPU volume sampling, improved PMJ sampling pattern, and more.
Some features have also been removed or changed, breaking backwards compatibility.
Including the removal of the OpenCL backend, for which alternatives are under
development.
Release notes and code docs:
https://wiki.blender.org/wiki/Reference/Release_Notes/3.0/Cycles
https://wiki.blender.org/wiki/Source/Render/Cycles
Credits:
* Sergey Sharybin
* Brecht Van Lommel
* Patrick Mours (OptiX backend)
* Christophe Hery (subsurface scattering anisotropy)
* William Leeson (PMJ sampling pattern)
* Alaska (various fixes and tweaks)
* Thomas Dinges (various fixes)
For the full commit history, see the cycles-x branch. This squashes together
all the changes since intermediate changes would often fail building or tests.
Ref T87839, T87837, T87836
Fixes T90734, T89353, T80267, T80267, T77185, T69800
2021-09-20 17:59:20 +02:00
|
|
|
/* Initialize execution of kernels on this queue.
|
|
|
|
|
*
|
|
|
|
|
* Will, for example, load all data required by the kernels from Device to global or path state.
|
|
|
|
|
*
|
|
|
|
|
* Use this method after device synchronization has finished before enqueueing any kernels. */
|
|
|
|
|
virtual void init_execution() = 0;
|
|
|
|
|
|
|
|
|
|
/* Enqueue kernel execution.
|
|
|
|
|
*
|
|
|
|
|
* Execute the kernel work_size times on the device.
|
|
|
|
|
* Supported arguments types:
|
|
|
|
|
* - int: pass pointer to the int
|
|
|
|
|
* - device memory: pass pointer to device_memory.device_pointer
|
|
|
|
|
* Return false if there was an error executing this or a previous kernel. */
|
2021-11-29 14:49:53 +00:00
|
|
|
virtual bool enqueue(DeviceKernel kernel,
|
|
|
|
|
const int work_size,
|
|
|
|
|
DeviceKernelArguments const &args) = 0;
|
Cycles: merge of cycles-x branch, a major update to the renderer
This includes much improved GPU rendering performance, viewport interactivity,
new shadow catcher, revamped sampling settings, subsurface scattering anisotropy,
new GPU volume sampling, improved PMJ sampling pattern, and more.
Some features have also been removed or changed, breaking backwards compatibility.
Including the removal of the OpenCL backend, for which alternatives are under
development.
Release notes and code docs:
https://wiki.blender.org/wiki/Reference/Release_Notes/3.0/Cycles
https://wiki.blender.org/wiki/Source/Render/Cycles
Credits:
* Sergey Sharybin
* Brecht Van Lommel
* Patrick Mours (OptiX backend)
* Christophe Hery (subsurface scattering anisotropy)
* William Leeson (PMJ sampling pattern)
* Alaska (various fixes and tweaks)
* Thomas Dinges (various fixes)
For the full commit history, see the cycles-x branch. This squashes together
all the changes since intermediate changes would often fail building or tests.
Ref T87839, T87837, T87836
Fixes T90734, T89353, T80267, T80267, T77185, T69800
2021-09-20 17:59:20 +02:00
|
|
|
|
|
|
|
|
/* Wait unit all enqueued kernels have finished execution.
|
|
|
|
|
* Return false if there was an error executing any of the enqueued kernels. */
|
|
|
|
|
virtual bool synchronize() = 0;
|
|
|
|
|
|
|
|
|
|
/* Copy memory to/from device as part of the command queue, to ensure
|
|
|
|
|
* operations are done in order without having to synchronize. */
|
|
|
|
|
virtual void zero_to_device(device_memory &mem) = 0;
|
|
|
|
|
virtual void copy_to_device(device_memory &mem) = 0;
|
|
|
|
|
virtual void copy_from_device(device_memory &mem) = 0;
|
|
|
|
|
|
|
|
|
|
/* Graphics resources interoperability.
|
|
|
|
|
*
|
|
|
|
|
* The interoperability comes here by the meaning that the device is capable of computing result
|
|
|
|
|
* directly into an OpenGL (or other graphics library) buffer. */
|
|
|
|
|
|
|
|
|
|
/* Create graphics interoperability context which will be taking care of mapping graphics
|
|
|
|
|
* resource as a buffer writable by kernels of this device. */
|
|
|
|
|
virtual unique_ptr<DeviceGraphicsInterop> graphics_interop_create()
|
|
|
|
|
{
|
|
|
|
|
LOG(FATAL) << "Request of GPU interop of a device which does not support it.";
|
|
|
|
|
return nullptr;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Device this queue has been created for. */
|
|
|
|
|
Device *device;
|
|
|
|
|
|
|
|
|
|
protected:
|
|
|
|
|
/* Hide construction so that allocation via `Device` API is enforced. */
|
|
|
|
|
explicit DeviceQueue(Device *device);
|
|
|
|
|
|
|
|
|
|
/* Implementations call these from the corresponding methods to generate debugging logs. */
|
|
|
|
|
void debug_init_execution();
|
2022-09-14 15:55:56 +02:00
|
|
|
void debug_enqueue_begin(DeviceKernel kernel, const int work_size);
|
|
|
|
|
void debug_enqueue_end();
|
Cycles: merge of cycles-x branch, a major update to the renderer
This includes much improved GPU rendering performance, viewport interactivity,
new shadow catcher, revamped sampling settings, subsurface scattering anisotropy,
new GPU volume sampling, improved PMJ sampling pattern, and more.
Some features have also been removed or changed, breaking backwards compatibility.
Including the removal of the OpenCL backend, for which alternatives are under
development.
Release notes and code docs:
https://wiki.blender.org/wiki/Reference/Release_Notes/3.0/Cycles
https://wiki.blender.org/wiki/Source/Render/Cycles
Credits:
* Sergey Sharybin
* Brecht Van Lommel
* Patrick Mours (OptiX backend)
* Christophe Hery (subsurface scattering anisotropy)
* William Leeson (PMJ sampling pattern)
* Alaska (various fixes and tweaks)
* Thomas Dinges (various fixes)
For the full commit history, see the cycles-x branch. This squashes together
all the changes since intermediate changes would often fail building or tests.
Ref T87839, T87837, T87836
Fixes T90734, T89353, T80267, T80267, T77185, T69800
2021-09-20 17:59:20 +02:00
|
|
|
void debug_synchronize();
|
2021-09-27 14:47:51 +02:00
|
|
|
string debug_active_kernels();
|
Cycles: merge of cycles-x branch, a major update to the renderer
This includes much improved GPU rendering performance, viewport interactivity,
new shadow catcher, revamped sampling settings, subsurface scattering anisotropy,
new GPU volume sampling, improved PMJ sampling pattern, and more.
Some features have also been removed or changed, breaking backwards compatibility.
Including the removal of the OpenCL backend, for which alternatives are under
development.
Release notes and code docs:
https://wiki.blender.org/wiki/Reference/Release_Notes/3.0/Cycles
https://wiki.blender.org/wiki/Source/Render/Cycles
Credits:
* Sergey Sharybin
* Brecht Van Lommel
* Patrick Mours (OptiX backend)
* Christophe Hery (subsurface scattering anisotropy)
* William Leeson (PMJ sampling pattern)
* Alaska (various fixes and tweaks)
* Thomas Dinges (various fixes)
For the full commit history, see the cycles-x branch. This squashes together
all the changes since intermediate changes would often fail building or tests.
Ref T87839, T87837, T87836
Fixes T90734, T89353, T80267, T80267, T77185, T69800
2021-09-20 17:59:20 +02:00
|
|
|
|
|
|
|
|
/* Combination of kernels enqueued together sync last synchronize. */
|
|
|
|
|
DeviceKernelMask last_kernels_enqueued_;
|
|
|
|
|
/* Time of synchronize call. */
|
|
|
|
|
double last_sync_time_;
|
|
|
|
|
/* Accumulated execution time for combinations of kernels launched together. */
|
|
|
|
|
map<DeviceKernelMask, double> stats_kernel_time_;
|
2022-09-14 15:55:56 +02:00
|
|
|
/* If it is true, then a performance statistics in the debugging logs will have focus on kernels
|
2022-09-28 09:41:31 +10:00
|
|
|
* and an explicit queue synchronization will be added after each kernel execution. */
|
2022-09-14 15:55:56 +02:00
|
|
|
bool is_per_kernel_performance_;
|
Cycles: merge of cycles-x branch, a major update to the renderer
This includes much improved GPU rendering performance, viewport interactivity,
new shadow catcher, revamped sampling settings, subsurface scattering anisotropy,
new GPU volume sampling, improved PMJ sampling pattern, and more.
Some features have also been removed or changed, breaking backwards compatibility.
Including the removal of the OpenCL backend, for which alternatives are under
development.
Release notes and code docs:
https://wiki.blender.org/wiki/Reference/Release_Notes/3.0/Cycles
https://wiki.blender.org/wiki/Source/Render/Cycles
Credits:
* Sergey Sharybin
* Brecht Van Lommel
* Patrick Mours (OptiX backend)
* Christophe Hery (subsurface scattering anisotropy)
* William Leeson (PMJ sampling pattern)
* Alaska (various fixes and tweaks)
* Thomas Dinges (various fixes)
For the full commit history, see the cycles-x branch. This squashes together
all the changes since intermediate changes would often fail building or tests.
Ref T87839, T87837, T87836
Fixes T90734, T89353, T80267, T80267, T77185, T69800
2021-09-20 17:59:20 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
CCL_NAMESPACE_END
|