Refactor: Cycles: Graphics interop changes

* Add GraphicsInteropDevice to check if interop is possible with device
* Rename GraphcisInterop to GraphicsInteropBuffer
* Include display device type and memory size in GraphicsInteropBuffer
* Unnest graphics interop class to make forward declarations possible

Pull Request: https://projects.blender.org/blender/blender/pulls/137363
This commit is contained in:
Brecht Van Lommel
2025-04-12 14:24:08 +02:00
parent 463fc8cf28
commit 4d7bd22beb
29 changed files with 428 additions and 218 deletions

View File

@@ -151,18 +151,27 @@ void OpenGLDisplayDriver::unmap_texture_buffer()
* Graphics interoperability.
*/
OpenGLDisplayDriver::GraphicsInterop OpenGLDisplayDriver::graphics_interop_get()
GraphicsInteropDevice OpenGLDisplayDriver::graphics_interop_get_device()
{
GraphicsInterop interop_dst;
GraphicsInteropDevice interop_device;
interop_device.type = GraphicsInteropDevice::OPENGL;
return interop_device;
}
interop_dst.buffer_width = texture_.buffer_width;
interop_dst.buffer_height = texture_.buffer_height;
interop_dst.opengl_pbo_id = texture_.gl_pbo_id;
GraphicsInteropBuffer OpenGLDisplayDriver::graphics_interop_get_buffer()
{
GraphicsInteropBuffer interop_buffer;
interop_dst.need_clear = texture_.need_clear;
interop_buffer.width = texture_.buffer_width;
interop_buffer.height = texture_.buffer_height;
interop_buffer.type = GraphicsInteropDevice::OPENGL;
interop_buffer.handle = texture_.gl_pbo_id;
interop_buffer.size = texture_.buffer_width * texture_.buffer_height * sizeof(half4);
interop_buffer.need_clear = texture_.need_clear;
texture_.need_clear = false;
return interop_dst;
return interop_buffer;
}
void OpenGLDisplayDriver::graphics_interop_activate()

View File

@@ -39,7 +39,8 @@ class OpenGLDisplayDriver : public DisplayDriver {
half4 *map_texture_buffer() override;
void unmap_texture_buffer() override;
GraphicsInterop graphics_interop_get() override;
GraphicsInteropDevice graphics_interop_get_device() override;
GraphicsInteropBuffer graphics_interop_get_buffer() override;
void draw(const Params &params) override;

View File

@@ -5,6 +5,7 @@
#include "GPU_context.hh"
#include "GPU_immediate.hh"
#include "GPU_platform.hh"
#include "GPU_platform_backend_enum.h"
#include "GPU_shader.hh"
#include "GPU_state.hh"
#include "GPU_texture.hh"
@@ -314,7 +315,7 @@ class DisplayGPUPixelBuffer {
bool gpu_resources_ensure(const uint new_width, const uint new_height)
{
const size_t required_size = sizeof(half4) * new_width * new_height * 4;
const size_t required_size = sizeof(half4) * new_width * new_height;
/* Try to re-use the existing PBO if it has usable size. */
if (gpu_pixel_buffer) {
@@ -620,22 +621,57 @@ void BlenderDisplayDriver::unmap_texture_buffer()
* Graphics interoperability.
*/
BlenderDisplayDriver::GraphicsInterop BlenderDisplayDriver::graphics_interop_get()
GraphicsInteropDevice BlenderDisplayDriver::graphics_interop_get_device()
{
GraphicsInterop interop_dst;
GraphicsInteropDevice interop_device;
if (GPU_backend_get_type() != GPU_BACKEND_OPENGL) {
return interop_dst;
switch (GPU_backend_get_type()) {
case GPU_BACKEND_OPENGL:
interop_device.type = GraphicsInteropDevice::OPENGL;
break;
case GPU_BACKEND_VULKAN:
case GPU_BACKEND_METAL:
case GPU_BACKEND_NONE:
case GPU_BACKEND_ANY:
/* Vulkan and Metal not supported yet by Cycles. */
interop_device.type = GraphicsInteropDevice::NONE;
break;
}
blender::Span<uint8_t> uuid = GPU_platform_uuid();
interop_device.uuid.resize(uuid.size());
std::copy_n(uuid.data(), uuid.size(), interop_device.uuid.data());
return interop_device;
}
GraphicsInteropBuffer BlenderDisplayDriver::graphics_interop_get_buffer()
{
GraphicsInteropBuffer interop_buffer;
interop_buffer.width = tiles_->current_tile.buffer_object.width;
interop_buffer.height = tiles_->current_tile.buffer_object.height;
GPUPixelBufferNativeHandle handle = GPU_pixel_buffer_get_native_handle(
tiles_->current_tile.buffer_object.gpu_pixel_buffer);
interop_dst.buffer_width = tiles_->current_tile.buffer_object.width;
interop_dst.buffer_height = tiles_->current_tile.buffer_object.height;
interop_dst.opengl_pbo_id = handle.handle;
switch (GPU_backend_get_type()) {
case GPU_BACKEND_OPENGL:
interop_buffer.type = GraphicsInteropDevice::OPENGL;
break;
case GPU_BACKEND_VULKAN:
case GPU_BACKEND_METAL:
case GPU_BACKEND_NONE:
case GPU_BACKEND_ANY:
/* Vulkan and Metal not supported yet by Cycles. */
interop_buffer.type = GraphicsInteropDevice::NONE;
break;
}
return interop_dst;
interop_buffer.handle = handle.handle;
interop_buffer.size = handle.size;
return interop_buffer;
}
void BlenderDisplayDriver::graphics_interop_activate()

View File

@@ -113,7 +113,8 @@ class BlenderDisplayDriver : public DisplayDriver {
half4 *map_texture_buffer() override;
void unmap_texture_buffer() override;
GraphicsInterop graphics_interop_get() override;
GraphicsInteropDevice graphics_interop_get_device() override;
GraphicsInteropBuffer graphics_interop_get_buffer() override;
void draw(const Params &params) override;

View File

@@ -26,6 +26,8 @@
# include "kernel/device/cuda/globals.h"
# include "session/display_driver.h"
CCL_NAMESPACE_BEGIN
class CUDADevice;
@@ -1048,14 +1050,9 @@ unique_ptr<DeviceQueue> CUDADevice::gpu_queue_create()
return make_unique<CUDADeviceQueue>(this);
}
bool CUDADevice::should_use_graphics_interop()
bool CUDADevice::should_use_graphics_interop(const GraphicsInteropDevice &interop_device,
const bool log)
{
/* Check whether this device is part of OpenGL context.
*
* Using CUDA device for graphics interoperability which is not part of the OpenGL context is
* possible, but from the empiric measurements it can be considerably slower than using naive
* pixels copy. */
if (headless) {
/* Avoid any call which might involve interaction with a graphics backend when we know that
* we don't have active graphics context. This avoid crash on certain platforms when calling
@@ -1065,20 +1062,45 @@ bool CUDADevice::should_use_graphics_interop()
CUDAContextScope scope(this);
int num_all_devices = 0;
cuda_assert(cuDeviceGetCount(&num_all_devices));
switch (interop_device.type) {
case GraphicsInteropDevice::OPENGL: {
/* Check whether this device is part of OpenGL context.
*
* Using CUDA device for graphics interoperability which is not part of the OpenGL context is
* possible, but from the empiric measurements it can be considerably slower than using naive
* pixels copy. */
int num_all_devices = 0;
cuda_assert(cuDeviceGetCount(&num_all_devices));
if (num_all_devices == 0) {
return false;
}
if (num_all_devices == 0) {
return false;
}
vector<CUdevice> gl_devices(num_all_devices);
uint num_gl_devices = 0;
cuGLGetDevices(&num_gl_devices, gl_devices.data(), num_all_devices, CU_GL_DEVICE_LIST_ALL);
vector<CUdevice> gl_devices(num_all_devices);
uint num_gl_devices = 0;
cuGLGetDevices(&num_gl_devices, gl_devices.data(), num_all_devices, CU_GL_DEVICE_LIST_ALL);
for (uint i = 0; i < num_gl_devices; ++i) {
if (gl_devices[i] == cuDevice) {
return true;
bool found = false;
for (uint i = 0; i < num_gl_devices; ++i) {
if (gl_devices[i] == cuDevice) {
found = true;
break;
}
}
if (log) {
if (found) {
VLOG_INFO << "Graphics interop: found matching OpenGL device for CUDA";
}
else {
VLOG_INFO << "Graphics interop: no matching OpenGL device for CUDA";
}
}
return found;
}
case GraphicsInteropDevice::NONE: {
return false;
}
}

View File

@@ -96,7 +96,8 @@ class CUDADevice : public GPUDevice {
void copy_host_to_device(void *device_pointer, void *host_pointer, const size_t size) override;
void const_copy_to(const char *name, void *host, const size_t size) override;
bool should_use_graphics_interop() override;
bool should_use_graphics_interop(const GraphicsInteropDevice &interop_device,
const bool log) override;
unique_ptr<DeviceQueue> gpu_queue_create() override;

View File

@@ -19,73 +19,91 @@ CUDADeviceGraphicsInterop::CUDADeviceGraphicsInterop(CUDADeviceQueue *queue)
CUDADeviceGraphicsInterop::~CUDADeviceGraphicsInterop()
{
CUDAContextScope scope(device_);
if (cu_graphics_resource_) {
cuda_device_assert(device_, cuGraphicsUnregisterResource(cu_graphics_resource_));
}
free();
}
void CUDADeviceGraphicsInterop::set_display_interop(
const DisplayDriver::GraphicsInterop &display_interop)
void CUDADeviceGraphicsInterop::set_buffer(const GraphicsInteropBuffer &interop_buffer)
{
const int64_t new_buffer_area = int64_t(display_interop.buffer_width) *
display_interop.buffer_height;
const int64_t new_buffer_area = int64_t(interop_buffer.width) * interop_buffer.height;
need_clear_ = display_interop.need_clear;
assert(interop_buffer.size >= interop_buffer.width * interop_buffer.height * sizeof(half4));
if (!display_interop.need_recreate) {
if (opengl_pbo_id_ == display_interop.opengl_pbo_id && buffer_area_ == new_buffer_area) {
need_clear_ = interop_buffer.need_clear;
if (!interop_buffer.need_recreate) {
if (native_type_ == interop_buffer.type && native_handle_ == interop_buffer.handle &&
native_size_ == interop_buffer.size && buffer_area_ == new_buffer_area)
{
return;
}
}
CUDAContextScope scope(device_);
free();
if (cu_graphics_resource_) {
cuda_device_assert(device_, cuGraphicsUnregisterResource(cu_graphics_resource_));
}
const CUresult result = cuGraphicsGLRegisterBuffer(
&cu_graphics_resource_, display_interop.opengl_pbo_id, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE);
if (result != CUDA_SUCCESS) {
LOG(ERROR) << "Error registering OpenGL buffer: " << cuewErrorString(result);
}
opengl_pbo_id_ = display_interop.opengl_pbo_id;
native_type_ = interop_buffer.type;
native_handle_ = interop_buffer.handle;
native_size_ = interop_buffer.size;
buffer_area_ = new_buffer_area;
switch (interop_buffer.type) {
case GraphicsInteropDevice::OPENGL: {
const CUresult result = cuGraphicsGLRegisterBuffer(
&cu_graphics_resource_, interop_buffer.handle, CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE);
if (result != CUDA_SUCCESS) {
LOG(ERROR) << "Error registering OpenGL buffer: " << cuewErrorString(result);
}
break;
}
case GraphicsInteropDevice::NONE:
break;
}
}
device_ptr CUDADeviceGraphicsInterop::map()
{
if (!cu_graphics_resource_) {
return 0;
}
if (cu_graphics_resource_) {
/* OpenGL buffer needs mapping. */
CUDAContextScope scope(device_);
CUDAContextScope scope(device_);
CUdeviceptr cu_buffer;
size_t bytes;
CUdeviceptr cu_buffer;
size_t bytes;
cuda_device_assert(device_, cuGraphicsMapResources(1, &cu_graphics_resource_, queue_->stream()));
cuda_device_assert(
device_, cuGraphicsResourceGetMappedPointer(&cu_buffer, &bytes, cu_graphics_resource_));
if (need_clear_) {
cuda_device_assert(device_,
cuGraphicsMapResources(1, &cu_graphics_resource_, queue_->stream()));
cuda_device_assert(
device_, cuMemsetD8Async(static_cast<CUdeviceptr>(cu_buffer), 0, bytes, queue_->stream()));
device_, cuGraphicsResourceGetMappedPointer(&cu_buffer, &bytes, cu_graphics_resource_));
need_clear_ = false;
if (need_clear_) {
cuda_device_assert(
device_,
cuMemsetD8Async(static_cast<CUdeviceptr>(cu_buffer), 0, bytes, queue_->stream()));
need_clear_ = false;
}
return static_cast<device_ptr>(cu_buffer);
}
return static_cast<device_ptr>(cu_buffer);
return 0;
}
void CUDADeviceGraphicsInterop::unmap()
{
CUDAContextScope scope(device_);
if (cu_graphics_resource_) {
CUDAContextScope scope(device_);
cuda_device_assert(device_,
cuGraphicsUnmapResources(1, &cu_graphics_resource_, queue_->stream()));
cuda_device_assert(device_,
cuGraphicsUnmapResources(1, &cu_graphics_resource_, queue_->stream()));
}
}
void CUDADeviceGraphicsInterop::free()
{
if (cu_graphics_resource_) {
cuda_device_assert(device_, cuGraphicsUnregisterResource(cu_graphics_resource_));
cu_graphics_resource_ = nullptr;
}
}
CCL_NAMESPACE_END

View File

@@ -5,6 +5,7 @@
#ifdef WITH_CUDA
# include "device/graphics_interop.h"
# include "session/display_driver.h"
# ifdef WITH_CUDA_DYNLOAD
# include "cuew.h"
@@ -29,7 +30,7 @@ class CUDADeviceGraphicsInterop : public DeviceGraphicsInterop {
CUDADeviceGraphicsInterop &operator=(const CUDADeviceGraphicsInterop &other) = delete;
CUDADeviceGraphicsInterop &operator=(CUDADeviceGraphicsInterop &&other) = delete;
void set_display_interop(const DisplayDriver::GraphicsInterop &display_interop) override;
void set_buffer(const GraphicsInteropBuffer &interop_buffer) override;
device_ptr map() override;
void unmap() override;
@@ -38,15 +39,21 @@ class CUDADeviceGraphicsInterop : public DeviceGraphicsInterop {
CUDADeviceQueue *queue_ = nullptr;
CUDADevice *device_ = nullptr;
/* OpenGL PBO which is currently registered as the destination for the CUDA buffer. */
int64_t opengl_pbo_id_ = 0;
/* Native handle. */
GraphicsInteropDevice::Type native_type_ = GraphicsInteropDevice::NONE;
int64_t native_handle_ = 0;
size_t native_size_ = 0;
/* Buffer area in pixels of the corresponding PBO. */
int64_t buffer_area_ = 0;
/* The destination was requested to be cleared. */
bool need_clear_ = false;
/* CUDA resources. */
CUgraphicsResource cu_graphics_resource_ = nullptr;
void free();
};
CCL_NAMESPACE_END

View File

@@ -26,6 +26,7 @@ CCL_NAMESPACE_BEGIN
class BVH;
class DeviceQueue;
class GraphicsInteropDevice;
class Progress;
class CPUKernels;
class Scene;
@@ -244,13 +245,14 @@ class Device {
/* Graphics resources interoperability.
*
* The interoperability comes here by the meaning that the device is capable of computing result
* directly into an OpenGL (or other graphics library) buffer. */
* directly into a OpenGL, Vulkan or Metal buffer. */
/* Check display is to be updated using graphics interoperability.
* The interoperability can not be used is it is not supported by the device. But the device
* might also force disable the interoperability if it detects that it will be slower than
* copying pixels from the render buffer. */
virtual bool should_use_graphics_interop()
virtual bool should_use_graphics_interop(const GraphicsInteropDevice & /*interop_device*/,
const bool /*log*/ = false)
{
return false;
}

View File

@@ -4,12 +4,12 @@
#pragma once
#include "session/display_driver.h"
#include "util/types.h"
CCL_NAMESPACE_BEGIN
class GraphicsInteropBuffer;
/* Device-side graphics interoperability support.
*
* Takes care of holding all the handlers needed by the device to implement interoperability with
@@ -19,9 +19,9 @@ class DeviceGraphicsInterop {
DeviceGraphicsInterop() = default;
virtual ~DeviceGraphicsInterop() = default;
/* Update this device-side graphics interoperability object with the given destination resource
* information. */
virtual void set_display_interop(const DisplayDriver::GraphicsInterop &display_interop) = 0;
/* Update this device-side graphics interoperability buffer with the given destination
* resource information. */
virtual void set_buffer(const GraphicsInteropBuffer &interop_buffer) = 0;
virtual device_ptr map() = 0;
virtual void unmap() = 0;

View File

@@ -25,6 +25,8 @@
# include "kernel/device/hip/globals.h"
# include "session/display_driver.h"
CCL_NAMESPACE_BEGIN
class HIPDevice;
@@ -1014,41 +1016,61 @@ unique_ptr<DeviceQueue> HIPDevice::gpu_queue_create()
return make_unique<HIPDeviceQueue>(this);
}
bool HIPDevice::should_use_graphics_interop()
bool HIPDevice::should_use_graphics_interop(const GraphicsInteropDevice &interop_device,
const bool log)
{
/* Check whether this device is part of OpenGL context.
*
* Using HIP device for graphics interoperability which is not part of the OpenGL context is
* possible, but from the empiric measurements it can be considerably slower than using naive
* pixels copy. */
if (headless) {
/* Avoid any call which might involve interaction with a graphics backend when we know that
* we don't have active graphics context. This avoids potential crash in the driver. */
return false;
}
/* Disable graphics interop for now, because of driver bug in 21.40. See #92972 */
# if 0
HIPContextScope scope(this);
int num_all_devices = 0;
hip_assert(hipGetDeviceCount(&num_all_devices));
switch (interop_device.type) {
case GraphicsInteropDevice::OPENGL: {
/* Disable graphics interop for now, because of driver bug in 21.40. See #92972.
* Also missing Vulkan support which is needed now. */
return false;
if (num_all_devices == 0) {
return false;
}
/* Check whether this device is part of OpenGL context.
*
* Using HIP device for graphics interoperability which is not part of the OpenGL context is
* possible, but from the empiric measurements with CUDA it can be considerably slower than
* using naive pixels copy. */
int num_all_devices = 0;
hip_assert(hipGetDeviceCount(&num_all_devices));
vector<hipDevice_t> gl_devices(num_all_devices);
uint num_gl_devices = 0;
hipGLGetDevices(&num_gl_devices, gl_devices.data(), num_all_devices, hipGLDeviceListAll);
if (num_all_devices == 0) {
return false;
}
for (hipDevice_t gl_device : gl_devices) {
if (gl_device == hipDevice) {
return true;
vector<hipDevice_t> gl_devices(num_all_devices);
uint num_gl_devices = 0;
hipGLGetDevices(&num_gl_devices, gl_devices.data(), num_all_devices, hipGLDeviceListAll);
bool found = false;
for (hipDevice_t gl_device : gl_devices) {
if (gl_device == hipDevice) {
found = true;
break;
}
}
if (log) {
if (found) {
VLOG_INFO << "Graphics interop: found matching OpenGL device for HIP";
}
else {
VLOG_INFO << "Graphics interop: no matching OpenGL device for HIP";
}
}
return found;
}
case GraphicsInteropDevice::NONE:
return false;
}
# endif
return false;
}

View File

@@ -96,7 +96,8 @@ class HIPDevice : public GPUDevice {
void const_copy_to(const char *name, void *host, const size_t size) override;
/* Graphics resources interoperability. */
bool should_use_graphics_interop() override;
bool should_use_graphics_interop(const GraphicsInteropDevice &interop_device,
const bool log) override;
unique_ptr<DeviceQueue> gpu_queue_create() override;

View File

@@ -19,73 +19,91 @@ HIPDeviceGraphicsInterop::HIPDeviceGraphicsInterop(HIPDeviceQueue *queue)
HIPDeviceGraphicsInterop::~HIPDeviceGraphicsInterop()
{
HIPContextScope scope(device_);
if (hip_graphics_resource_) {
hip_device_assert(device_, hipGraphicsUnregisterResource(hip_graphics_resource_));
}
free();
}
void HIPDeviceGraphicsInterop::set_display_interop(
const DisplayDriver::GraphicsInterop &display_interop)
void HIPDeviceGraphicsInterop::set_buffer(const GraphicsInteropBuffer &interop_buffer)
{
const int64_t new_buffer_area = int64_t(display_interop.buffer_width) *
display_interop.buffer_height;
const int64_t new_buffer_area = int64_t(interop_buffer.width) * interop_buffer.height;
need_clear_ = display_interop.need_clear;
assert(interop_buffer.size >= interop_buffer.width * interop_buffer.height * sizeof(half4));
if (opengl_pbo_id_ == display_interop.opengl_pbo_id && buffer_area_ == new_buffer_area) {
return;
need_clear_ = interop_buffer.need_clear;
if (!interop_buffer.need_recreate) {
if (native_type_ == interop_buffer.type && native_handle_ == interop_buffer.handle &&
native_size_ == interop_buffer.size && buffer_area_ == new_buffer_area)
{
return;
}
}
HIPContextScope scope(device_);
free();
if (hip_graphics_resource_) {
hip_device_assert(device_, hipGraphicsUnregisterResource(hip_graphics_resource_));
}
const hipError_t result = hipGraphicsGLRegisterBuffer(
&hip_graphics_resource_, display_interop.opengl_pbo_id, hipGraphicsRegisterFlagsNone);
if (result != hipSuccess) {
LOG(ERROR) << "Error registering OpenGL buffer: " << hipewErrorString(result);
}
opengl_pbo_id_ = display_interop.opengl_pbo_id;
native_type_ = interop_buffer.type;
native_handle_ = interop_buffer.handle;
native_size_ = interop_buffer.size;
buffer_area_ = new_buffer_area;
switch (interop_buffer.type) {
case GraphicsInteropDevice::OPENGL: {
const hipError_t result = hipGraphicsGLRegisterBuffer(
&hip_graphics_resource_, interop_buffer.handle, hipGraphicsRegisterFlagsNone);
if (result != hipSuccess) {
LOG(ERROR) << "Error registering OpenGL buffer: " << hipewErrorString(result);
}
break;
}
case GraphicsInteropDevice::NONE:
break;
}
}
device_ptr HIPDeviceGraphicsInterop::map()
{
if (!hip_graphics_resource_) {
return 0;
}
if (hip_graphics_resource_) {
HIPContextScope scope(device_);
HIPContextScope scope(device_);
hipDeviceptr_t hip_buffer;
size_t bytes;
hipDeviceptr_t hip_buffer;
size_t bytes;
hip_device_assert(device_,
hipGraphicsMapResources(1, &hip_graphics_resource_, queue_->stream()));
hip_device_assert(
device_, hipGraphicsResourceGetMappedPointer(&hip_buffer, &bytes, hip_graphics_resource_));
if (need_clear_) {
hip_device_assert(device_,
hipGraphicsMapResources(1, &hip_graphics_resource_, queue_->stream()));
hip_device_assert(
device_,
hipMemsetD8Async(static_cast<hipDeviceptr_t>(hip_buffer), 0, bytes, queue_->stream()));
device_, hipGraphicsResourceGetMappedPointer(&hip_buffer, &bytes, hip_graphics_resource_));
need_clear_ = false;
if (need_clear_) {
hip_device_assert(
device_,
hipMemsetD8Async(static_cast<hipDeviceptr_t>(hip_buffer), 0, bytes, queue_->stream()));
need_clear_ = false;
}
return static_cast<device_ptr>(hip_buffer);
}
return static_cast<device_ptr>(hip_buffer);
return 0;
}
void HIPDeviceGraphicsInterop::unmap()
{
HIPContextScope scope(device_);
if (hip_graphics_resource_) {
HIPContextScope scope(device_);
hip_device_assert(device_,
hipGraphicsUnmapResources(1, &hip_graphics_resource_, queue_->stream()));
hip_device_assert(device_,
hipGraphicsUnmapResources(1, &hip_graphics_resource_, queue_->stream()));
}
}
void HIPDeviceGraphicsInterop::free()
{
if (hip_graphics_resource_) {
hip_device_assert(device_, hipGraphicsUnregisterResource(hip_graphics_resource_));
hip_graphics_resource_ = nullptr;
}
}
CCL_NAMESPACE_END

View File

@@ -5,6 +5,7 @@
#ifdef WITH_HIP
# include "device/graphics_interop.h"
# include "session/display_driver.h"
# ifdef WITH_HIP_DYNLOAD
# include "hipew.h"
@@ -27,7 +28,7 @@ class HIPDeviceGraphicsInterop : public DeviceGraphicsInterop {
HIPDeviceGraphicsInterop &operator=(const HIPDeviceGraphicsInterop &other) = delete;
HIPDeviceGraphicsInterop &operator=(HIPDeviceGraphicsInterop &&other) = delete;
void set_display_interop(const DisplayDriver::GraphicsInterop &display_interop) override;
void set_buffer(const GraphicsInteropBuffer &interop_buffer) override;
device_ptr map() override;
void unmap() override;
@@ -36,8 +37,11 @@ class HIPDeviceGraphicsInterop : public DeviceGraphicsInterop {
HIPDeviceQueue *queue_ = nullptr;
HIPDevice *device_ = nullptr;
/* OpenGL PBO which is currently registered as the destination for the HIP buffer. */
int64_t opengl_pbo_id_ = 0;
/* Native handle. */
GraphicsInteropDevice::Type native_type_ = GraphicsInteropDevice::NONE;
int64_t native_handle_ = 0;
size_t native_size_ = 0;
/* Buffer area in pixels of the corresponding PBO. */
int64_t buffer_area_ = 0;
@@ -45,6 +49,8 @@ class HIPDeviceGraphicsInterop : public DeviceGraphicsInterop {
bool need_clear_ = false;
hipGraphicsResource hip_graphics_resource_ = nullptr;
void free();
};
CCL_NAMESPACE_END

View File

@@ -134,7 +134,8 @@ class MetalDevice : public Device {
void erase_allocation(device_memory &mem);
bool should_use_graphics_interop() override;
bool should_use_graphics_interop(const GraphicsInteropDevice &interop_device,
const bool log) override;
void *get_native_buffer(device_ptr ptr) override;

View File

@@ -1360,7 +1360,8 @@ unique_ptr<DeviceQueue> MetalDevice::gpu_queue_create()
return make_unique<MetalDeviceQueue>(this);
}
bool MetalDevice::should_use_graphics_interop()
bool MetalDevice::should_use_graphics_interop(const GraphicsInteropDevice & /*interop_device*/,
const bool /*log*/)
{
/* METAL_WIP - provide fast interop */
return false;

View File

@@ -928,7 +928,8 @@ unique_ptr<DeviceQueue> OneapiDevice::gpu_queue_create()
return make_unique<OneapiDeviceQueue>(this);
}
bool OneapiDevice::should_use_graphics_interop()
bool OneapiDevice::should_use_graphics_interop(const GraphicsInteropDevice & /*interop_device*/,
const bool /*log*/)
{
/* NOTE(@nsirgien): oneAPI doesn't yet support direct writing into graphics API objects, so
* return false. */

View File

@@ -107,7 +107,8 @@ class OneapiDevice : public GPUDevice {
void const_copy_to(const char *name, void *host, const size_t size) override;
/* Graphics resources interoperability. */
bool should_use_graphics_interop() override;
bool should_use_graphics_interop(const GraphicsInteropDevice &interop_device,
const bool log) override;
unique_ptr<DeviceQueue> gpu_queue_create() override;

View File

@@ -194,17 +194,27 @@ void HdCyclesDisplayDriver::unmap_texture_buffer()
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
}
DisplayDriver::GraphicsInterop HdCyclesDisplayDriver::graphics_interop_get()
GraphicsInteropDevice HdCyclesDisplayDriver::graphics_interop_get_device()
{
GraphicsInterop interop_dst;
interop_dst.buffer_width = pbo_size_.x;
interop_dst.buffer_height = pbo_size_.y;
interop_dst.opengl_pbo_id = gl_pbo_id_;
GraphicsInteropDevice interop_device;
interop_device.type = GraphicsInteropDevice::OPENGL;
return interop_device;
}
interop_dst.need_clear = need_clear_;
GraphicsInteropBuffer HdCyclesDisplayDriver::graphics_interop_get_buffer()
{
GraphicsInteropBuffer interop_buffer;
interop_buffer.width = pbo_size_.x;
interop_buffer.height = pbo_size_.y;
interop_buffer.type = GraphicsInteropDevice::OPENGL;
interop_buffer.handle = gl_pbo_id_;
interop_buffer.size = pbo_size_.x * pbo_size_.y * sizeof(half4);
interop_buffer.need_clear = need_clear_;
need_clear_ = false;
return interop_dst;
return interop_buffer;
}
void HdCyclesDisplayDriver::graphics_interop_activate()

View File

@@ -32,7 +32,8 @@ class HdCyclesDisplayDriver final : public CCL_NS::DisplayDriver {
CCL_NS::half4 *map_texture_buffer() override;
void unmap_texture_buffer() override;
GraphicsInterop graphics_interop_get() override;
GraphicsInteropDevice graphics_interop_get_device() override;
GraphicsInteropBuffer graphics_interop_get_buffer() override;
void graphics_interop_activate() override;
void graphics_interop_deactivate() override;

View File

@@ -7,6 +7,7 @@
#include "device/device.h"
#include "integrator/denoiser_oidn.h"
#include "session/display_driver.h"
#ifdef WITH_OPENIMAGEDENOISE
# include "integrator/denoiser_oidn_gpu.h"
#endif
@@ -42,7 +43,9 @@ static bool is_single_device(const Device *device)
/* Find best suitable device to perform denoiser on. Will iterate over possible sub-devices of
* multi-device. */
static Device *find_best_device(Device *device, const DenoiserType type)
static Device *find_best_device(Device *device,
const DenoiserType type,
const GraphicsInteropDevice &interop_device)
{
Device *best_device = nullptr;
@@ -61,7 +64,8 @@ static Device *find_best_device(Device *device, const DenoiserType type)
}
/* Prefer a device that can use graphics interop for faster display update. */
if (sub_device->should_use_graphics_interop() && !best_device->should_use_graphics_interop())
if (sub_device->should_use_graphics_interop(interop_device) &&
!best_device->should_use_graphics_interop(interop_device))
{
best_device = sub_device;
}
@@ -101,6 +105,7 @@ bool use_gpu_oidn_denoiser(Device *denoiser_device, const DenoiseParams &params)
DenoiseParams get_effective_denoise_params(Device *denoiser_device,
Device *cpu_fallback_device,
const DenoiseParams &params,
const GraphicsInteropDevice &interop_device,
Device *&single_denoiser_device)
{
DCHECK(params.use);
@@ -116,7 +121,7 @@ DenoiseParams get_effective_denoise_params(Device *denoiser_device,
/* Find best device from the ones which are proposed for denoising.
* The choice is expected to be between a few GPUs, or between a GPU and a CPU
* or between a few GPUs and a CPU. */
single_denoiser_device = find_best_device(denoiser_device, params.type);
single_denoiser_device = find_best_device(denoiser_device, params.type, interop_device);
}
/* Ensure that we have a device to be used later in the code below. */
if (single_denoiser_device == nullptr) {
@@ -142,12 +147,13 @@ DenoiseParams get_effective_denoise_params(Device *denoiser_device,
unique_ptr<Denoiser> Denoiser::create(Device *denoiser_device,
Device *cpu_fallback_device,
const DenoiseParams &params)
const DenoiseParams &params,
const GraphicsInteropDevice &interop_device)
{
Device *single_denoiser_device;
const DenoiseParams effective_denoiser_params = get_effective_denoise_params(
denoiser_device, cpu_fallback_device, params, single_denoiser_device);
denoiser_device, cpu_fallback_device, params, interop_device, single_denoiser_device);
const bool is_cpu_denoiser_device = single_denoiser_device->info.type == DEVICE_CPU;
if (is_cpu_denoiser_device == false) {

View File

@@ -17,6 +17,7 @@ CCL_NAMESPACE_BEGIN
class BufferParams;
class Device;
class GraphicsInteropDevice;
class RenderBuffers;
class Progress;
@@ -27,6 +28,7 @@ bool use_gpu_oidn_denoiser(Device *denoiser_device, const DenoiseParams &params)
DenoiseParams get_effective_denoise_params(Device *denoiser_device,
Device *cpu_fallback_device,
const DenoiseParams &params,
const GraphicsInteropDevice &interop_device,
Device *&single_denoiser_device);
/* Implementation of a specific denoising algorithm.
@@ -44,10 +46,12 @@ class Denoiser {
* This is checked in debug builds.
* - The device might be MultiDevice.
* - If Denoiser from params is not supported by provided denoise device, then Blender will
* fallback on the OIDN CPU denoising and use provided cpu_fallback_device. */
* fallback on the OIDN CPU denoising and use provided cpu_fallback_device.
* - Specifying the graphics interop device helps pick a more efficient denoising device.*/
static unique_ptr<Denoiser> create(Device *denoiser_device,
Device *cpu_fallback_device,
const DenoiseParams &params);
const DenoiseParams &params,
const GraphicsInteropDevice &interop_device);
virtual ~Denoiser() = default;

View File

@@ -15,6 +15,7 @@
#include "scene/pass.h"
#include "scene/scene.h"
#include "session/display_driver.h"
#include "session/tile.h"
#include "util/log.h"
@@ -496,10 +497,15 @@ void PathTrace::set_denoiser_params(const DenoiseParams &params)
return;
}
GraphicsInteropDevice interop_device;
if (display_) {
interop_device = display_->graphics_interop_get_device();
}
Device *effective_denoise_device;
Device *cpu_fallback_device = cpu_device_.get();
const DenoiseParams effective_denoise_params = get_effective_denoise_params(
denoise_device_, cpu_fallback_device, params, effective_denoise_device);
denoise_device_, cpu_fallback_device, params, interop_device, effective_denoise_device);
bool need_to_recreate_denoiser = false;
if (denoiser_) {
@@ -540,7 +546,7 @@ void PathTrace::set_denoiser_params(const DenoiseParams &params)
if (need_to_recreate_denoiser) {
denoiser_ = Denoiser::create(
effective_denoise_device, cpu_fallback_device, effective_denoise_params);
effective_denoise_device, cpu_fallback_device, effective_denoise_params, interop_device);
if (denoiser_) {
/* Only take into account the "immediate" cancel to have interactive rendering responding to

View File

@@ -186,7 +186,12 @@ void PathTraceDisplay::unmap_texture_buffer()
* Graphics interoperability.
*/
DisplayDriver::GraphicsInterop PathTraceDisplay::graphics_interop_get()
GraphicsInteropDevice PathTraceDisplay::graphics_interop_get_device()
{
return driver_->graphics_interop_get_device();
}
GraphicsInteropBuffer PathTraceDisplay::graphics_interop_get_buffer()
{
DCHECK(!texture_buffer_state_.is_mapped);
DCHECK(update_state_.is_active);
@@ -194,18 +199,18 @@ DisplayDriver::GraphicsInterop PathTraceDisplay::graphics_interop_get()
if (texture_buffer_state_.is_mapped) {
LOG(ERROR)
<< "Attempt to use graphics interoperability mode while the texture buffer is mapped.";
return DisplayDriver::GraphicsInterop();
return GraphicsInteropBuffer();
}
if (!update_state_.is_active) {
LOG(ERROR) << "Attempt to use graphics interoperability outside of PathTraceDisplay update.";
return DisplayDriver::GraphicsInterop();
return GraphicsInteropBuffer();
}
/* Assume that interop will write new values to the texture. */
mark_texture_updated();
return driver_->graphics_interop_get();
return driver_->graphics_interop_get_buffer();
}
void PathTraceDisplay::graphics_interop_activate()

View File

@@ -110,12 +110,13 @@ class PathTraceDisplay {
* device. Complementary part of DeviceGraphicsInterop.
*
* NOTE: Graphics interoperability can not be used while the texture buffer is mapped. This means
* that `graphics_interop_get()` is not allowed between `map_texture_buffer()` and
* that `graphics_interop_get_buffer()` is not allowed between `map_texture_buffer()` and
* `unmap_texture_buffer()` calls. */
/* Get PathTraceDisplay graphics interoperability information which acts as a destination for the
* device API. */
DisplayDriver::GraphicsInterop graphics_interop_get();
GraphicsInteropDevice graphics_interop_get_device();
GraphicsInteropBuffer graphics_interop_get_buffer();
/* (De)activate GPU display for graphics interoperability outside of regular display update
* routines. */
@@ -136,7 +137,7 @@ class PathTraceDisplay {
* after clear will write new pixel values for an updating area, leaving everything else zeroed.
*
* If the GPU display supports graphics interoperability then the zeroing the display is to be
* delegated to the device via the `DisplayDriver::GraphicsInterop`. */
* delegated to the device via the `GraphicsInterop`. */
void clear();
/* Draw the current state of the texture.

View File

@@ -934,7 +934,7 @@ int PathTraceWorkGPU::num_active_main_paths_paths()
return num_paths;
}
bool PathTraceWorkGPU::should_use_graphics_interop()
bool PathTraceWorkGPU::should_use_graphics_interop(PathTraceDisplay *display)
{
/* There are few aspects with the graphics interop when using multiple devices caused by the fact
* that the PathTraceDisplay has a single texture:
@@ -948,7 +948,8 @@ bool PathTraceWorkGPU::should_use_graphics_interop()
if (!interop_use_checked_) {
Device *device = queue_->device;
interop_use_ = device->should_use_graphics_interop();
interop_use_ = device->should_use_graphics_interop(display->graphics_interop_get_device(),
true);
if (interop_use_) {
VLOG_INFO << "Using graphics interop GPU display update.";
@@ -978,7 +979,7 @@ void PathTraceWorkGPU::copy_to_display(PathTraceDisplay *display,
return;
}
if (should_use_graphics_interop()) {
if (should_use_graphics_interop(display)) {
if (copy_to_display_interop(display, pass_mode, num_samples)) {
return;
}
@@ -1040,8 +1041,8 @@ bool PathTraceWorkGPU::copy_to_display_interop(PathTraceDisplay *display,
device_graphics_interop_ = queue_->graphics_interop_create();
}
const DisplayDriver::GraphicsInterop graphics_interop_dst = display->graphics_interop_get();
device_graphics_interop_->set_display_interop(graphics_interop_dst);
const GraphicsInteropBuffer interop_buffer = display->graphics_interop_get_buffer();
device_graphics_interop_->set_buffer(interop_buffer);
const device_ptr d_rgba_half = device_graphics_interop_->map();
if (!d_rgba_half) {

View File

@@ -84,7 +84,7 @@ class PathTraceWorkGPU : public PathTraceWork {
int num_active_main_paths_paths();
/* Check whether graphics interop can be used for the PathTraceDisplay update. */
bool should_use_graphics_interop();
bool should_use_graphics_interop(PathTraceDisplay *display);
/* Naive implementation of the `copy_to_display()` which performs film conversion on the
* device, then copies pixels to the host and pushes them to the `display`. */

View File

@@ -5,6 +5,7 @@
#include "session/denoising.h"
#include "device/cpu/device.h"
#include "session/display_driver.h"
#include "util/map.h"
#include "util/task.h"
@@ -614,7 +615,7 @@ DenoiserPipeline::DenoiserPipeline(DeviceInfo &denoiser_device_info, const Denoi
device_cpu_info(cpu_devices);
cpu_device = device_cpu_create(cpu_devices[0], device->stats, device->profiler, true);
denoiser = Denoiser::create(device.get(), cpu_device.get(), params);
denoiser = Denoiser::create(device.get(), cpu_device.get(), params, GraphicsInteropDevice());
if (denoiser) {
denoiser->load_kernels(nullptr);
}

View File

@@ -6,9 +6,58 @@
#include "util/half.h"
#include "util/types.h"
#include "util/vector.h"
CCL_NAMESPACE_BEGIN
/* Info about the display device that will be used for graphics interop, so it
* can be verified if interop is compatible with the rendering device. */
class GraphicsInteropDevice {
public:
enum Type {
NONE,
OPENGL,
};
Type type = NONE;
vector<uint8_t> uuid;
};
/* Handle to a native graphics API pixel buffer. If supported, the rendering device
* may write directly to this buffer instead of calling map_texture_buffer() and
* unmap_texture_buffer().
*
* This must be a pixel buffer with the specified with and height, and half float
* with RGBA channels. */
class GraphicsInteropBuffer {
public:
/* Dimensions of the buffer, in pixels. */
int width = 0;
int height = 0;
/* The handle is expected to be:
* - OpenGL: pixel buffer object ID. */
GraphicsInteropDevice::Type type = GraphicsInteropDevice::NONE;
int64_t handle = 0;
/* Actual size of the memory, which must be >= width * height sizeof(half4). */
size_t size = 0;
/* Clear the entire buffer before doing partial write to it. */
bool need_clear = false;
/* Enforce re-creation of the graphics interop object.
*
* When this field is true then the graphics interop will be re-created no matter what the
* rest of the configuration is.
* When this field is false the graphics interop will be re-created if the PBO or buffer size
* did change.
*
* This allows to ensure graphics interop is re-created when there is a possibility that an
* underlying PBO was re-allocated but did not change its ID. */
bool need_recreate = false;
};
/* Display driver for efficient interactive display of renders.
*
* Host applications implement this interface for viewport rendering. For best performance, we
@@ -76,36 +125,14 @@ class DisplayDriver {
virtual half4 *map_texture_buffer() = 0;
virtual void unmap_texture_buffer() = 0;
/* Optionally return a handle to a native graphics API texture buffer. If supported,
* the rendering device may write directly to this buffer instead of calling
* map_texture_buffer() and unmap_texture_buffer(). */
class GraphicsInterop {
public:
/* Dimensions of the buffer, in pixels. */
int buffer_width = 0;
int buffer_height = 0;
/* OpenGL pixel buffer object. */
int64_t opengl_pbo_id = 0;
/* Clear the entire buffer before doing partial write to it. */
bool need_clear = false;
/* Enforce re-creation of the graphics interop object.
*
* When this field is true then the graphics interop will be re-created no matter what the
* rest of the configuration is.
* When this field is false the graphics interop will be re-created if the PBO or buffer size
* did change.
*
* This allows to ensure graphics interop is re-created when there is a possibility that an
* underlying PBO was re-allocated but did not change its ID. */
bool need_recreate = false;
};
virtual GraphicsInterop graphics_interop_get()
virtual GraphicsInteropDevice graphics_interop_get_device()
{
return GraphicsInterop();
return GraphicsInteropDevice();
}
virtual GraphicsInteropBuffer graphics_interop_get_buffer()
{
return GraphicsInteropBuffer();
}
/* (De)activate graphics context required for editing or deleting the graphics interop