diff --git a/scripts/startup/bl_ui/properties_render.py b/scripts/startup/bl_ui/properties_render.py index 9c31af7c8b0..c42afa0d392 100644 --- a/scripts/startup/bl_ui/properties_render.py +++ b/scripts/startup/bl_ui/properties_render.py @@ -777,6 +777,8 @@ class CompositorDenoisePerformanceButtonsPanel: layout.use_property_decorate = False col = layout.column() + row = col.row() + row.prop(rd, "compositor_denoise_device", text="Denoising Device", expand=True) col.prop(rd, "compositor_denoise_preview_quality", text="Preview Quality") col.prop(rd, "compositor_denoise_final_quality", text="Final Quality") diff --git a/source/blender/compositor/CMakeLists.txt b/source/blender/compositor/CMakeLists.txt index 44b04f74e2f..74bd8f0767b 100644 --- a/source/blender/compositor/CMakeLists.txt +++ b/source/blender/compositor/CMakeLists.txt @@ -142,9 +142,11 @@ set(SRC derived_resources/COM_denoised_auxiliary_pass.hh utilities/intern/gpu_material.cc + utilities/intern/oidn.cc utilities/COM_utilities_diagonals.hh utilities/COM_utilities_gpu_material.hh + utilities/COM_utilities_oidn.hh ) set(LIB diff --git a/source/blender/compositor/derived_resources/intern/denoised_auxiliary_pass.cc b/source/blender/compositor/derived_resources/intern/denoised_auxiliary_pass.cc index 4cdef6062aa..fe3daafb815 100644 --- a/source/blender/compositor/derived_resources/intern/denoised_auxiliary_pass.cc +++ b/source/blender/compositor/derived_resources/intern/denoised_auxiliary_pass.cc @@ -9,6 +9,7 @@ # include "BLI_assert.h" # include "BLI_hash.hh" +# include "BLI_span.hh" # include "MEM_guardedalloc.h" @@ -18,6 +19,7 @@ # include "COM_context.hh" # include "COM_denoised_auxiliary_pass.hh" # include "COM_result.hh" +# include "COM_utilities_oidn.hh" # include @@ -89,29 +91,31 @@ DenoisedAuxiliaryPass::DenoisedAuxiliaryPass(Context &context, /* Float3 results might be stored in 4-component textures due to hardware limitations, so we * need to use the pixel stride of the texture. */ - const int pixel_stride = sizeof(float) * - (context.use_gpu() ? - GPU_texture_component_len(GPU_texture_format(pass)) : - pass.channels_count()); + const int channels_count = context.use_gpu() ? + GPU_texture_component_len(GPU_texture_format(pass)) : + pass.channels_count(); + const int pixel_stride = sizeof(float) * channels_count; - oidn::DeviceRef device = oidn::newDevice(oidn::DeviceType::CPU); + oidn::DeviceRef device = create_oidn_device(context); device.commit(); + const int64_t buffer_size = int64_t(width) * height * channels_count; + const MutableSpan buffer_span = MutableSpan(this->denoised_buffer, buffer_size); + oidn::BufferRef buffer = create_oidn_buffer(device, buffer_span); + /* Denoise the pass in place, so set it to both the input and output. */ oidn::FilterRef filter = device.newFilter("RT"); - filter.setImage(get_pass_name(type), - this->denoised_buffer, - oidn::Format::Float3, - width, - height, - 0, - pixel_stride); - filter.setImage( - "output", this->denoised_buffer, oidn::Format::Float3, width, height, 0, pixel_stride); + const char *pass_name = get_pass_name(type); + filter.setImage(pass_name, buffer, oidn::Format::Float3, width, height, 0, pixel_stride); + filter.setImage("output", buffer, oidn::Format::Float3, width, height, 0, pixel_stride); filter.set("quality", quality); filter.setProgressMonitorFunction(oidn_progress_monitor_function, &context); filter.commit(); filter.execute(); + + if (buffer.getStorage() != oidn::Storage::Host) { + buffer.read(0, buffer_size * sizeof(float), this->denoised_buffer); + } } DenoisedAuxiliaryPass::~DenoisedAuxiliaryPass() diff --git a/source/blender/compositor/utilities/COM_utilities_oidn.hh b/source/blender/compositor/utilities/COM_utilities_oidn.hh new file mode 100644 index 00000000000..3a798a9fec3 --- /dev/null +++ b/source/blender/compositor/utilities/COM_utilities_oidn.hh @@ -0,0 +1,32 @@ +/* SPDX-FileCopyrightText: 2025 Blender Authors + * + * SPDX-License-Identifier: GPL-2.0-or-later */ + +#pragma once + +#ifdef WITH_OPENIMAGEDENOISE + +# include "BLI_span.hh" + +# include "COM_context.hh" + +# include + +namespace blender::compositor { + +/* Create an appropriate device based on the device preferences in the given context. Special + * attention is given to GPU devices, as multiple GPUs could exist, so the same GPU device used in + * the active GPU context is chosen. If no GPU context is active, OIDN chooses the best device, + * which is typically the fastest in the system. Such device selection makes execution more + * predictable and allows interoperability across APIs. */ +oidn::DeviceRef create_oidn_device(const Context &context); + +/* Creates a buffer on the given device that represents the given image. If the device can access + * host-side data, the returned buffer is a simple wrapper around the data, otherwise, the data is + * copied to a device-only buffer. It is thus expected that the given image data will outlive the + * returned buffer. */ +oidn::BufferRef create_oidn_buffer(const oidn::DeviceRef &device, const MutableSpan image); + +} // namespace blender::compositor + +#endif diff --git a/source/blender/compositor/utilities/intern/oidn.cc b/source/blender/compositor/utilities/intern/oidn.cc new file mode 100644 index 00000000000..2388bdc6545 --- /dev/null +++ b/source/blender/compositor/utilities/intern/oidn.cc @@ -0,0 +1,111 @@ +/* SPDX-FileCopyrightText: 2025 Blender Authors + * + * SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifdef WITH_OPENIMAGEDENOISE + +# include + +# include "BLI_array.hh" +# include "BLI_assert.h" +# include "BLI_span.hh" + +# include "GPU_platform.hh" + +# include "COM_context.hh" +# include "COM_utilities_oidn.hh" + +# include + +namespace blender::compositor { + +oidn::DeviceRef create_oidn_gpu_device(const Context &context) +{ + + /* The compositor uses CPU execution and does not have an active GPU context or device, so let + * OIDN select the best device, which is typically the fastest. */ + if (!context.use_gpu()) { + return oidn::newDevice(oidn::DeviceType::Default); + } + + /* Try to select the device that is used by the currently active GPU context. First, try to + * select the device based on the device LUID. */ + const Span platform_luid = GPU_platform_luid(); + const uint32_t platform_luid_node_mask = GPU_platform_luid_node_mask(); + const int devices_count = oidn::getNumPhysicalDevices(); + for (int i = 0; i < devices_count; i++) { + oidn::PhysicalDeviceRef physical_device(i); + if (!physical_device.get("luidSupported")) { + continue; + } + + oidn::LUID luid = physical_device.get("luid"); + uint32_t luid_node_mask = physical_device.get("nodeMask"); + if (platform_luid == Span(luid.bytes, sizeof(luid.bytes)) && + platform_luid_node_mask == luid_node_mask) + { + return physical_device.newDevice(); + } + } + + /* If LUID matching was unsuccessful, try to match based on UUID. We rely on multiple selection + * methods because not all platforms support both UUID and LUID, but all platforms support either + * one of them. UUID supports all except MacOS Metal, while LUID only supports Windows and MacOS + * Metal. Note that we prefer LUID as a first match because UUID is unreliable in practice as + * some implementations report the same UUID for different devices in the same machine. */ + const Span platform_uuid = GPU_platform_uuid(); + for (int i = 0; i < devices_count; i++) { + oidn::PhysicalDeviceRef physical_device(i); + if (!physical_device.get("uuidSupported")) { + continue; + } + + oidn::UUID uuid = physical_device.get("uuid"); + if (platform_uuid == Span(uuid.bytes, sizeof(uuid.bytes))) { + return physical_device.newDevice(); + } + } + + return oidn::newDevice(oidn::DeviceType::Default); +} + +oidn::DeviceRef create_oidn_device(const Context &context) +{ + const eCompositorDenoiseDevice preferred_denoise_device = static_cast( + context.get_render_data().compositor_denoise_device); + + switch (preferred_denoise_device) { + case SCE_COMPOSITOR_DENOISE_DEVICE_CPU: + return oidn::newDevice(oidn::DeviceType::CPU); + case SCE_COMPOSITOR_DENOISE_DEVICE_GPU: + return create_oidn_gpu_device(context); + case SCE_COMPOSITOR_DENOISE_DEVICE_AUTO: + if (!context.use_gpu()) { + return oidn::newDevice(oidn::DeviceType::CPU); + } + else { + return create_oidn_gpu_device(context); + } + } + + BLI_assert_unreachable(); + return oidn::newDevice(oidn::DeviceType::Default); +} + +oidn::BufferRef create_oidn_buffer(const oidn::DeviceRef &device, const MutableSpan image) +{ + /* The device can access host-side data, so create a shared buffer that wraps the data. */ + const bool can_access_host_memory = device.get("systemMemorySupported"); + if (can_access_host_memory) { + return device.newBuffer(image.data(), image.size_in_bytes()); + } + + /* Otherwise, create a device-only buffer and copy the data to it. */ + oidn::BufferRef buffer = device.newBuffer(image.size_in_bytes(), oidn::Storage::Device); + buffer.write(0, image.size_in_bytes(), image.data()); + return buffer; +} + +} // namespace blender::compositor + +#endif diff --git a/source/blender/gpu/GPU_platform.hh b/source/blender/gpu/GPU_platform.hh index ad4ae4826c2..d597a27b58b 100644 --- a/source/blender/gpu/GPU_platform.hh +++ b/source/blender/gpu/GPU_platform.hh @@ -8,6 +8,8 @@ #pragma once +#include +#include #include #include "BLI_span.hh" @@ -90,3 +92,11 @@ const char *GPU_platform_support_level_key(); const char *GPU_platform_gpu_name(); GPUArchitectureType GPU_platform_architecture(); blender::Span GPU_platform_devices_list(); + +/* The UUID of the device. Can be an empty array, since it is not supported on all platforms. */ +blender::Span GPU_platform_uuid(); +/* The LUID of the device. Can be an empty array, since it is not supported on all platforms. */ +blender::Span GPU_platform_luid(); +/* A bit field with the nth bit active identifying the nth device with the same LUID. Only matters + * if LUID is defined. */ +uint32_t GPU_platform_luid_node_mask(); diff --git a/source/blender/gpu/intern/gpu_platform.cc b/source/blender/gpu/intern/gpu_platform.cc index 52d8e33ab62..899504afb05 100644 --- a/source/blender/gpu/intern/gpu_platform.cc +++ b/source/blender/gpu/intern/gpu_platform.cc @@ -9,6 +9,8 @@ * with checks for drivers and GPU support. */ +#include + #include "MEM_guardedalloc.h" #include "BLI_dynstr.h" @@ -104,6 +106,9 @@ void GPUPlatformGlobal::clear() MEM_SAFE_FREE(support_key); MEM_SAFE_FREE(gpu_name); devices.clear_and_shrink(); + device_uuid.reinitialize(0); + device_luid.reinitialize(0); + device_luid_node_mask = 0; initialized = false; } @@ -179,4 +184,19 @@ blender::Span GPU_platform_devices_list() return GPG.devices.as_span(); } +blender::Span GPU_platform_uuid() +{ + return GPG.device_uuid.as_span(); +} + +blender::Span GPU_platform_luid() +{ + return GPG.device_luid.as_span(); +} + +uint32_t GPU_platform_luid_node_mask() +{ + return GPG.device_luid_node_mask; +} + /** \} */ diff --git a/source/blender/gpu/intern/gpu_platform_private.hh b/source/blender/gpu/intern/gpu_platform_private.hh index cd05886a617..7f0a206bd35 100644 --- a/source/blender/gpu/intern/gpu_platform_private.hh +++ b/source/blender/gpu/intern/gpu_platform_private.hh @@ -8,6 +8,9 @@ #pragma once +#include + +#include "BLI_array.hh" #include "BLI_vector.hh" #include "GPU_platform.hh" @@ -30,6 +33,14 @@ class GPUPlatformGlobal { GPUArchitectureType architecture_type = GPU_ARCHITECTURE_IMR; Vector devices; + /* The UUID of the device. Can be an empty array, since it is not supported on all platforms. */ + Array device_uuid; + /* The LUID of the device. Can be an empty array, since it is not supported on all platforms. */ + Array device_luid; + /* A bit field with the nth bit active identifying the nth device with the same LUID. Only + * matters if device_luid is defined. */ + uint32_t device_luid_node_mask; + void init(eGPUDeviceType gpu_device, eGPUOSType os_type, eGPUDriverType driver_type, diff --git a/source/blender/gpu/metal/mtl_backend.mm b/source/blender/gpu/metal/mtl_backend.mm index 5f4c7e1a1de..d0be87aec83 100644 --- a/source/blender/gpu/metal/mtl_backend.mm +++ b/source/blender/gpu/metal/mtl_backend.mm @@ -6,6 +6,8 @@ * \ingroup gpu */ +#include + #include "BKE_global.hh" #include "gpu_backend.hh" @@ -243,6 +245,17 @@ void MTLBackend::platform_init(MTLContext *ctx) renderer, version, architecture_type); + + /* UUID is not supported on Metal. */ + GPG.device_uuid.reinitialize(0); + + /* LUID is registryID on Metal, or at least this is what libraries like OIDN expects. */ + const uint64_t luid = mtl_device.registryID; + GPG.device_luid.reinitialize(sizeof(luid)); + std::memcpy(GPG.device_luid.data(), &luid, sizeof(luid)); + + /* Metal only has one device per LUID, so only the first bit will always be active.. */ + GPG.device_luid_node_mask = 1; } void MTLBackend::platform_exit() diff --git a/source/blender/gpu/opengl/gl_backend.cc b/source/blender/gpu/opengl/gl_backend.cc index 62bb3acc3d4..e193fc4dbfe 100644 --- a/source/blender/gpu/opengl/gl_backend.cc +++ b/source/blender/gpu/opengl/gl_backend.cc @@ -6,12 +6,15 @@ * \ingroup gpu */ +#include #include #include "BKE_global.hh" #if defined(WIN32) # include "BLI_winstuff.h" #endif +#include "BLI_array.hh" +#include "BLI_span.hh" #include "BLI_string_ref.hh" #include "BLI_subprocess.hh" #include "BLI_threads.h" @@ -230,6 +233,33 @@ void GLBackend::platform_init() renderer, version, GPU_ARCHITECTURE_IMR); + + GPG.device_uuid.reinitialize(0); + GPG.device_luid.reinitialize(0); + GPG.device_luid_node_mask = 0; + + if (epoxy_has_gl_extension("GL_EXT_memory_object")) { + GLint number_of_devices = 0; + glGetIntegerv(GL_NUM_DEVICE_UUIDS_EXT, &number_of_devices); + /* Multiple devices could be used by the context if certain extensions like multi-cast is used. + * But this is not used by Blender, so this should always be 1. */ + BLI_assert(number_of_devices == 1); + + GLubyte device_uuid[GL_UUID_SIZE_EXT] = {0}; + glGetUnsignedBytei_vEXT(GL_DEVICE_UUID_EXT, 0, device_uuid); + GPG.device_uuid = Array(Span(device_uuid, GL_UUID_SIZE_EXT)); + + /* LUID is only supported on Windows. */ + if (epoxy_has_gl_extension("GL_EXT_memory_object_win32") && (os & GPU_OS_WIN)) { + GLubyte device_luid[GL_LUID_SIZE_EXT] = {0}; + glGetUnsignedBytevEXT(GL_DEVICE_LUID_EXT, device_luid); + GPG.device_luid = Array(Span(device_luid, GL_LUID_SIZE_EXT)); + + GLint node_mask = 0; + glGetIntegerv(GL_DEVICE_NODE_MASK_EXT, &node_mask); + GPG.device_luid_node_mask = uint32_t(node_mask); + } + } } void GLBackend::platform_exit() diff --git a/source/blender/gpu/vulkan/vk_backend.cc b/source/blender/gpu/vulkan/vk_backend.cc index 7dacdf8d336..fa065e2c54f 100644 --- a/source/blender/gpu/vulkan/vk_backend.cc +++ b/source/blender/gpu/vulkan/vk_backend.cc @@ -331,6 +331,19 @@ void VKBackend::platform_init(const VKDevice &device) GPU_ARCHITECTURE_IMR); GPG.devices = devices; + const VkPhysicalDeviceIDProperties &id_properties = device.physical_device_id_properties_get(); + + GPG.device_uuid = Array(Span(id_properties.deviceUUID, VK_UUID_SIZE)); + + if (id_properties.deviceLUIDValid) { + GPG.device_luid = Array(Span(id_properties.deviceUUID, VK_LUID_SIZE)); + GPG.device_luid_node_mask = id_properties.deviceNodeMask; + } + else { + GPG.device_luid.reinitialize(0); + GPG.device_luid_node_mask = 0; + } + CLOG_INFO(&LOG, 0, "Using vendor [%s] device [%s] driver version [%s].", diff --git a/source/blender/gpu/vulkan/vk_device.cc b/source/blender/gpu/vulkan/vk_device.cc index 44ae8cbda57..5d315f4f914 100644 --- a/source/blender/gpu/vulkan/vk_device.cc +++ b/source/blender/gpu/vulkan/vk_device.cc @@ -149,7 +149,9 @@ void VKDevice::init_physical_device_properties() vk_physical_device_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; vk_physical_device_driver_properties_.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES; + vk_physical_device_id_properties_.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES; vk_physical_device_properties.pNext = &vk_physical_device_driver_properties_; + vk_physical_device_driver_properties_.pNext = &vk_physical_device_id_properties_; vkGetPhysicalDeviceProperties2(vk_physical_device_, &vk_physical_device_properties); vk_physical_device_properties_ = vk_physical_device_properties.properties; diff --git a/source/blender/gpu/vulkan/vk_device.hh b/source/blender/gpu/vulkan/vk_device.hh index 18048c3d51f..26dc4a8cf3e 100644 --- a/source/blender/gpu/vulkan/vk_device.hh +++ b/source/blender/gpu/vulkan/vk_device.hh @@ -193,6 +193,7 @@ class VKDevice : public NonCopyable { /** Limits of the device linked to this context. */ VkPhysicalDeviceProperties vk_physical_device_properties_ = {}; VkPhysicalDeviceDriverProperties vk_physical_device_driver_properties_ = {}; + VkPhysicalDeviceIDProperties vk_physical_device_id_properties_ = {}; VkPhysicalDeviceMemoryProperties vk_physical_device_memory_properties_ = {}; /** Features support. */ VkPhysicalDeviceFeatures vk_physical_device_features_ = {}; @@ -248,6 +249,11 @@ class VKDevice : public NonCopyable { return vk_physical_device_properties_; } + const VkPhysicalDeviceIDProperties &physical_device_id_properties_get() const + { + return vk_physical_device_id_properties_; + } + const VkPhysicalDeviceFeatures &physical_device_features_get() const { return vk_physical_device_features_; diff --git a/source/blender/makesdna/DNA_scene_types.h b/source/blender/makesdna/DNA_scene_types.h index 5ecae861002..182427e0a76 100644 --- a/source/blender/makesdna/DNA_scene_types.h +++ b/source/blender/makesdna/DNA_scene_types.h @@ -830,9 +830,14 @@ typedef struct RenderData { /** Precision used by the GPU execution of the compositor tree. */ int compositor_precision; /* eCompositorPrecision */ + /** Device to use for denoise nodes in the compositor. */ + int compositor_denoise_device; /* eCompositorDenoiseDevice */ + /** Global configuration for denoise compositor nodes. */ int compositor_denoise_preview_quality; /* eCompositorDenoiseQaulity */ int compositor_denoise_final_quality; /* eCompositorDenoiseQaulity */ + + char _pad6[4]; } RenderData; /** #RenderData::quality_flag */ @@ -865,6 +870,13 @@ typedef enum eCompositorPrecision { SCE_COMPOSITOR_PRECISION_FULL = 1, } eCompositorPrecision; +/** #RenderData::compositor_denoise_device */ +typedef enum eCompositorDenoiseDevice { + SCE_COMPOSITOR_DENOISE_DEVICE_AUTO = 0, + SCE_COMPOSITOR_DENOISE_DEVICE_CPU = 1, + SCE_COMPOSITOR_DENOISE_DEVICE_GPU = 2, +} eCompositorDenoiseDevice; + /** #RenderData::compositor_denoise_preview_quality */ /** #RenderData::compositor_denoise_final_quality */ typedef enum eCompositorDenoiseQaulity { diff --git a/source/blender/makesrna/intern/rna_scene.cc b/source/blender/makesrna/intern/rna_scene.cc index 099dccb397d..44b3b2094e2 100644 --- a/source/blender/makesrna/intern/rna_scene.cc +++ b/source/blender/makesrna/intern/rna_scene.cc @@ -6826,6 +6826,25 @@ static void rna_def_scene_render_data(BlenderRNA *brna) {0, nullptr, 0, nullptr, nullptr}, }; + static const EnumPropertyItem compositor_denoise_device_items[] = { + {SCE_COMPOSITOR_DENOISE_DEVICE_AUTO, + "AUTO", + 0, + "Auto", + "Use the same device used by the compositor to process the denoise node"}, + {SCE_COMPOSITOR_DENOISE_DEVICE_CPU, + "CPU", + 0, + "CPU", + "Use the CPU to process the denoise node"}, + {SCE_COMPOSITOR_DENOISE_DEVICE_GPU, + "GPU", + 0, + "GPU", + "Use the GPU to process the denoise node if available, otherwise fallback to CPU"}, + {0, nullptr, 0, nullptr, nullptr}, + }; + static const EnumPropertyItem compositor_denoise_quality_items[] = { {SCE_COMPOSITOR_DENOISE_HIGH, "HIGH", 0, "High", "High quality"}, {SCE_COMPOSITOR_DENOISE_BALANCED, @@ -7578,6 +7597,15 @@ static void rna_def_scene_render_data(BlenderRNA *brna) prop, "Compositor Precision", "The precision of compositor intermediate result"); RNA_def_property_update(prop, NC_NODE | ND_DISPLAY, "rna_Scene_compositor_update"); + prop = RNA_def_property(srna, "compositor_denoise_device", PROP_ENUM, PROP_NONE); + RNA_def_property_enum_sdna(prop, nullptr, "compositor_denoise_device"); + RNA_def_property_enum_items(prop, compositor_denoise_device_items); + RNA_def_property_enum_default(prop, SCE_COMPOSITOR_DENOISE_DEVICE_AUTO); + RNA_def_property_ui_text(prop, + "Compositor Denoise Node Device", + "The device to use to process the denoise nodes in the compositor"); + RNA_def_property_update(prop, NC_NODE | ND_DISPLAY, "rna_Scene_compositor_update"); + prop = RNA_def_property(srna, "compositor_denoise_preview_quality", PROP_ENUM, PROP_NONE); RNA_def_property_enum_sdna(prop, nullptr, "compositor_denoise_preview_quality"); RNA_def_property_enum_items(prop, compositor_denoise_quality_items); diff --git a/source/blender/nodes/composite/nodes/node_composite_denoise.cc b/source/blender/nodes/composite/nodes/node_composite_denoise.cc index a6ba7914784..3e22fc9b55c 100644 --- a/source/blender/nodes/composite/nodes/node_composite_denoise.cc +++ b/source/blender/nodes/composite/nodes/node_composite_denoise.cc @@ -10,6 +10,8 @@ # include "BLI_system.h" #endif +#include "BLI_span.hh" + #include "MEM_guardedalloc.h" #include "UI_interface.hh" @@ -24,6 +26,7 @@ #include "COM_derived_resources.hh" #include "COM_node_operation.hh" #include "COM_utilities.hh" +#include "COM_utilities_oidn.hh" #include "node_composite_util.hh" @@ -126,7 +129,7 @@ class DenoiseOperation : public NodeOperation { output_image.allocate_texture(input_image.domain()); #ifdef WITH_OPENIMAGEDENOISE - oidn::DeviceRef device = oidn::newDevice(oidn::DeviceType::CPU); + oidn::DeviceRef device = create_oidn_device(this->context()); device.set("setAffinity", false); device.commit(); @@ -151,9 +154,16 @@ class DenoiseOperation : public NodeOperation { input_color = const_cast(static_cast(input_image.cpu_data().data())); output_color = static_cast(output_image.cpu_data().data()); } + + const int64_t buffer_size = int64_t(width) * height * input_image.channels_count(); + const MutableSpan input_buffer_span = MutableSpan(input_color, buffer_size); + oidn::BufferRef input_buffer = create_oidn_buffer(device, input_buffer_span); + const MutableSpan output_buffer_span = MutableSpan(output_color, buffer_size); + oidn::BufferRef output_buffer = create_oidn_buffer(device, output_buffer_span); + oidn::FilterRef filter = device.newFilter("RT"); - filter.setImage("color", input_color, oidn::Format::Float3, width, height, 0, pixel_stride); - filter.setImage("output", output_color, oidn::Format::Float3, width, height, 0, pixel_stride); + filter.setImage("color", input_buffer, oidn::Format::Float3, width, height, 0, pixel_stride); + filter.setImage("output", output_buffer, oidn::Format::Float3, width, height, 0, pixel_stride); filter.set("hdr", use_hdr()); filter.set("cleanAux", auxiliary_passes_are_clean()); this->set_filter_quality(filter); @@ -183,7 +193,11 @@ class DenoiseOperation : public NodeOperation { } } - filter.setImage("albedo", albedo, oidn::Format::Float3, width, height, 0, pixel_stride); + const MutableSpan albedo_buffer_span = MutableSpan(albedo, buffer_size); + oidn::BufferRef albedo_buffer = create_oidn_buffer(device, albedo_buffer_span); + + filter.setImage( + "albedo", albedo_buffer, oidn::Format::Float3, width, height, 0, pixel_stride); } /* If the albedo and normal inputs are not single value inputs, set the normal input to the @@ -213,18 +227,27 @@ class DenoiseOperation : public NodeOperation { /* Float3 results might be stored in 4-component textures due to hardware limitations, so we * need to use the pixel stride of the texture. */ - int normal_pixel_stride = sizeof(float) * - (this->context().use_gpu() ? - GPU_texture_component_len(GPU_texture_format(input_normal)) : - input_normal.channels_count()); + const int normal_channels_count = this->context().use_gpu() ? + GPU_texture_component_len( + GPU_texture_format(input_normal)) : + input_normal.channels_count(); + int normal_pixel_stride = sizeof(float) * normal_channels_count; + + const int64_t normal_buffer_size = int64_t(width) * height * normal_channels_count; + const MutableSpan normal_buffer_span = MutableSpan(normal, normal_buffer_size); + oidn::BufferRef normal_buffer = create_oidn_buffer(device, normal_buffer_span); filter.setImage( - "normal", normal, oidn::Format::Float3, width, height, 0, normal_pixel_stride); + "normal", normal_buffer, oidn::Format::Float3, width, height, 0, normal_pixel_stride); } filter.commit(); filter.execute(); + if (output_buffer.getStorage() != oidn::Storage::Host) { + output_buffer.read(0, buffer_size * sizeof(float), output_color); + } + if (this->context().use_gpu()) { GPU_texture_update(output_image, data_format, output_color); }