diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py index 8cbfc0a98b9..95f2df291df 100644 --- a/intern/cycles/blender/addon/properties.py +++ b/intern/cycles/blender/addon/properties.py @@ -222,7 +222,8 @@ enum_guiding_directional_sampling_types = ( def enum_openimagedenoise_denoiser(self, context): import _cycles if _cycles.with_openimagedenoise: - return [('OPENIMAGEDENOISE', "OpenImageDenoise", "Use Intel OpenImageDenoise AI denoiser running on the CPU", 4)] + return [('OPENIMAGEDENOISE', "OpenImageDenoise", + "Use Intel OpenImageDenoise AI denoiser", 4)] return [] diff --git a/intern/cycles/device/cpu/device.cpp b/intern/cycles/device/cpu/device.cpp index 3024697da18..5d0959d5e6d 100644 --- a/intern/cycles/device/cpu/device.cpp +++ b/intern/cycles/device/cpu/device.cpp @@ -4,6 +4,7 @@ #include "device/cpu/device.h" #include "device/cpu/device_impl.h" +#include "integrator/denoiser_oidn.h" /* Used for `info.denoisers`. */ /* TODO(sergey): The denoisers are probably to be moved completely out of the device into their diff --git a/intern/cycles/device/oneapi/device.cpp b/intern/cycles/device/oneapi/device.cpp index 2c35046ddfa..33bc006c799 100644 --- a/intern/cycles/device/oneapi/device.cpp +++ b/intern/cycles/device/oneapi/device.cpp @@ -9,6 +9,7 @@ #ifdef WITH_ONEAPI # include "device/device.h" # include "device/oneapi/device_impl.h" +# include "integrator/denoiser_oidn_gpu.h" # include "util/path.h" # include "util/string.h" @@ -107,7 +108,11 @@ static void device_iterator_cb( info.id = id; info.has_nanovdb = true; - info.denoisers = 0; +# if defined(WITH_OPENIMAGEDENOISE) + if (OIDNDenoiserGPU::is_device_supported(info)) { + info.denoisers |= DENOISER_OPENIMAGEDENOISE; + } +# endif info.has_gpu_queue = true; diff --git a/intern/cycles/integrator/CMakeLists.txt b/intern/cycles/integrator/CMakeLists.txt index 6e27df17f41..4ac700d74bb 100644 --- a/intern/cycles/integrator/CMakeLists.txt +++ b/intern/cycles/integrator/CMakeLists.txt @@ -14,6 +14,7 @@ set(SRC denoiser.cpp denoiser_gpu.cpp denoiser_oidn.cpp + denoiser_oidn_gpu.cpp denoiser_optix.cpp path_trace.cpp tile.cpp @@ -36,6 +37,7 @@ set(SRC_HEADERS denoiser.h denoiser_gpu.h denoiser_oidn.h + denoiser_oidn_gpu.h denoiser_optix.h guiding.h path_trace.h diff --git a/intern/cycles/integrator/denoiser.cpp b/intern/cycles/integrator/denoiser.cpp index 2392fcd7d61..2735de9a12e 100644 --- a/intern/cycles/integrator/denoiser.cpp +++ b/intern/cycles/integrator/denoiser.cpp @@ -6,6 +6,9 @@ #include "device/device.h" #include "integrator/denoiser_oidn.h" +#ifdef WITH_OPENIMAGEDENOISE +# include "integrator/denoiser_oidn_gpu.h" +#endif #include "integrator/denoiser_optix.h" #include "session/buffers.h" #include "util/log.h" @@ -23,6 +26,12 @@ unique_ptr Denoiser::create(Device *path_trace_device, const DenoisePa } #endif +#ifdef WITH_OPENIMAGEDENOISE + if (params.type == DENOISER_OPENIMAGEDENOISE && path_trace_device->info.type == DEVICE_ONEAPI) { + return make_unique(path_trace_device, params); + } +#endif + /* Always fallback to OIDN. */ DenoiseParams oidn_params = params; oidn_params.type = DENOISER_OPENIMAGEDENOISE; @@ -131,8 +140,8 @@ static DeviceInfo find_best_denoiser_device_info(const vector &devic continue; } - /* TODO(sergey): Use one of the already configured devices, so that OptiX denoising can happen - * on a physical CUDA device which is already used for rendering. */ + /* TODO(sergey): Use one of the already configured devices, so that GPU denoising can happen + * on a physical device which is already used for rendering. */ /* TODO(sergey): Choose fastest device for denoising. */ diff --git a/intern/cycles/integrator/denoiser_oidn.h b/intern/cycles/integrator/denoiser_oidn.h index fb8f1d05b26..3828735ab14 100644 --- a/intern/cycles/integrator/denoiser_oidn.h +++ b/intern/cycles/integrator/denoiser_oidn.h @@ -10,7 +10,7 @@ CCL_NAMESPACE_BEGIN -/* Implementation of denoising API which uses OpenImageDenoise library. */ +/* Implementation of a CPU based denoiser which uses OpenImageDenoise library. */ class OIDNDenoiser : public Denoiser { public: /* Forwardly declared state which might be using compile-flag specific fields, such as diff --git a/intern/cycles/integrator/denoiser_oidn_gpu.cpp b/intern/cycles/integrator/denoiser_oidn_gpu.cpp new file mode 100644 index 00000000000..308a88754b0 --- /dev/null +++ b/intern/cycles/integrator/denoiser_oidn_gpu.cpp @@ -0,0 +1,350 @@ +/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation + * + * SPDX-License-Identifier: Apache-2.0 */ + +#if defined(WITH_OPENIMAGEDENOISE) + +# include "integrator/denoiser_oidn_gpu.h" + +# include + +# include "device/device.h" +# include "device/queue.h" +# include "integrator/pass_accessor_cpu.h" +# include "session/buffers.h" +# include "util/array.h" +# include "util/log.h" +# include "util/openimagedenoise.h" + +# include "kernel/device/cpu/compat.h" +# include "kernel/device/cpu/kernel.h" + +# if OIDN_VERSION_MAJOR < 2 +# define oidnSetFilterBool oidnSetFilter1b +# define oidnSetFilterInt oidnSetFilter1i +# define oidnExecuteFilterAsync oidnExecuteFilter +# endif + +CCL_NAMESPACE_BEGIN + +/* Ideally, this would be dynamic and adaptively change when the runtime runs out of memory. */ +constexpr int prefilter_max_mem = 1024; + +thread_mutex OIDNDenoiserGPU::mutex_; +bool OIDNDenoiserGPU::is_device_type_supported(const DeviceType &type) +{ + switch (type) { +# ifdef OIDN_DEVICE_SYCL + /* Assume all devices with Cycles support are also supported by OIDN2. */ + case DEVICE_ONEAPI: + return true; +# endif + default: + return false; + } +} + +bool OIDNDenoiserGPU::is_device_supported(const DeviceInfo &device) +{ + /* Currently falls back to checking just the device type, can be improved. */ + return is_device_type_supported(device.type); +} + +OIDNDenoiserGPU::OIDNDenoiserGPU(Device *path_trace_device, const DenoiseParams ¶ms) + : DenoiserGPU(path_trace_device, params) +{ + DCHECK_EQ(params.type, DENOISER_OPENIMAGEDENOISE); +} + +OIDNDenoiserGPU::~OIDNDenoiserGPU() +{ + if (albedo_filter_) { + oidnReleaseFilter(albedo_filter_); + } + if (normal_filter_) { + oidnReleaseFilter(normal_filter_); + } + if (oidn_filter_) { + oidnReleaseFilter(oidn_filter_); + } + if (oidn_device_) { + oidnReleaseDevice(oidn_device_); + } +} + +bool OIDNDenoiserGPU::denoise_buffer(const BufferParams &buffer_params, + RenderBuffers *render_buffers, + const int num_samples, + bool allow_inplace_modification) +{ + return DenoiserGPU::denoise_buffer( + buffer_params, render_buffers, num_samples, allow_inplace_modification); +} + +uint OIDNDenoiserGPU::get_device_type_mask() const +{ + uint device_mask = 0; +# ifdef OIDN_DEVICE_SYCL + device_mask |= DEVICE_MASK_ONEAPI; +# endif + return device_mask; +} + +OIDNFilter OIDNDenoiserGPU::create_filter() +{ + const char *error_message = nullptr; + OIDNFilter filter = oidnNewFilter(oidn_device_, "RT"); + if (filter == nullptr) { + OIDNError err = oidnGetDeviceError(oidn_device_, (const char **)&error_message); + if (OIDN_ERROR_NONE != err) { + LOG(ERROR) << "OIDN error: " << error_message; + denoiser_device_->set_error(error_message); + } + } + return filter; +} + +bool OIDNDenoiserGPU::denoise_create_if_needed(DenoiseContext &context) +{ + const bool recreate_denoiser = (oidn_device_ == nullptr) || (oidn_filter_ == nullptr) || + (use_pass_albedo_ != context.use_pass_albedo) || + (use_pass_normal_ != context.use_pass_normal); + if (!recreate_denoiser) { + return true; + } + + /* Destroy existing handle before creating new one. */ + if (oidn_filter_) { + oidnReleaseFilter(oidn_filter_); + } + + if (oidn_device_) { + oidnReleaseDevice(oidn_device_); + } + + switch (denoiser_device_->info.type) { +# if defined(OIDN_DEVICE_SYCL) + case DEVICE_ONEAPI: + oidn_device_ = oidnNewDevice(OIDN_DEVICE_TYPE_SYCL); + denoiser_queue_->init_execution(); + break; +# endif + default: + break; + } + if (!oidn_device_) { + denoiser_device_->set_error("Failed to create OIDN device"); + return false; + } + + oidnCommitDevice(oidn_device_); + + oidn_filter_ = create_filter(); + if (oidn_filter_ == nullptr) { + return false; + } + + oidnSetFilterBool(oidn_filter_, "hdr", true); + oidnSetFilterBool(oidn_filter_, "srgb", false); + oidnSetFilterInt(oidn_filter_, "maxMemoryMB", max_mem_); + if (params_.prefilter == DENOISER_PREFILTER_NONE || + params_.prefilter == DENOISER_PREFILTER_ACCURATE) + { + oidnSetFilterInt(oidn_filter_, "cleanAux", true); + } + + if (context.use_pass_albedo) { + albedo_filter_ = create_filter(); + if (albedo_filter_ == nullptr) { + oidnSetFilterInt(oidn_filter_, "maxMemoryMB", prefilter_max_mem); + return false; + } + } + + if (context.use_pass_normal) { + normal_filter_ = create_filter(); + if (normal_filter_ == nullptr) { + oidnSetFilterInt(oidn_filter_, "maxMemoryMB", prefilter_max_mem); + return false; + } + } + + /* OIDN denoiser handle was created with the requested number of input passes. */ + use_pass_albedo_ = context.use_pass_albedo; + use_pass_normal_ = context.use_pass_normal; + + /* OIDN denoiser has been created, but it needs configuration. */ + is_configured_ = false; + return true; +} + +bool OIDNDenoiserGPU::denoise_configure_if_needed(DenoiseContext &context) +{ + /* Limit maximum tile size denoiser can be invoked with. */ + const int2 size = make_int2(context.buffer_params.width, context.buffer_params.height); + + if (is_configured_ && (configured_size_.x == size.x && configured_size_.y == size.y)) { + return true; + } + + is_configured_ = true; + configured_size_ = size; + + return true; +} + +bool OIDNDenoiserGPU::denoise_run(const DenoiseContext &context, const DenoisePass &pass) +{ + /* Color pass. */ + const int64_t pass_stride_in_bytes = context.buffer_params.pass_stride * sizeof(float); + + oidnSetSharedFilterImage(oidn_filter_, + "color", + (void *)context.render_buffers->buffer.device_pointer, + OIDN_FORMAT_FLOAT3, + context.buffer_params.width, + context.buffer_params.height, + pass.denoised_offset * sizeof(float), + pass_stride_in_bytes, + pass_stride_in_bytes * context.buffer_params.stride); + oidnSetSharedFilterImage(oidn_filter_, + "output", + (void *)context.render_buffers->buffer.device_pointer, + OIDN_FORMAT_FLOAT3, + context.buffer_params.width, + context.buffer_params.height, + pass.denoised_offset * sizeof(float), + pass_stride_in_bytes, + pass_stride_in_bytes * context.buffer_params.stride); + + /* Optional albedo and color passes. */ + if (context.num_input_passes > 1) { + const device_ptr d_guiding_buffer = context.guiding_params.device_pointer; + const int64_t pixel_stride_in_bytes = context.guiding_params.pass_stride * sizeof(float); + const int64_t row_stride_in_bytes = context.guiding_params.stride * pixel_stride_in_bytes; + + if (context.use_pass_albedo) { + if (params_.prefilter == DENOISER_PREFILTER_NONE) { + oidnSetSharedFilterImage(oidn_filter_, + "albedo", + (void *)d_guiding_buffer, + OIDN_FORMAT_FLOAT3, + context.buffer_params.width, + context.buffer_params.height, + context.guiding_params.pass_albedo * sizeof(float), + pixel_stride_in_bytes, + row_stride_in_bytes); + } + else { + oidnSetSharedFilterImage(albedo_filter_, + "color", + (void *)d_guiding_buffer, + OIDN_FORMAT_FLOAT3, + context.buffer_params.width, + context.buffer_params.height, + context.guiding_params.pass_albedo * sizeof(float), + pixel_stride_in_bytes, + row_stride_in_bytes); + oidnSetSharedFilterImage(albedo_filter_, + "output", + (void *)d_guiding_buffer, + OIDN_FORMAT_FLOAT3, + context.buffer_params.width, + context.buffer_params.height, + context.guiding_params.pass_albedo * sizeof(float), + pixel_stride_in_bytes, + row_stride_in_bytes); + oidnCommitFilter(albedo_filter_); + oidnExecuteFilterAsync(albedo_filter_); + + oidnSetSharedFilterImage(oidn_filter_, + "albedo", + (void *)d_guiding_buffer, + OIDN_FORMAT_FLOAT3, + context.buffer_params.width, + context.buffer_params.height, + context.guiding_params.pass_albedo * sizeof(float), + pixel_stride_in_bytes, + row_stride_in_bytes); + } + } + + if (context.use_pass_normal) { + if (params_.prefilter == DENOISER_PREFILTER_NONE) { + oidnSetSharedFilterImage(oidn_filter_, + "normal", + (void *)d_guiding_buffer, + OIDN_FORMAT_FLOAT3, + context.buffer_params.width, + context.buffer_params.height, + context.guiding_params.pass_normal * sizeof(float), + pixel_stride_in_bytes, + row_stride_in_bytes); + } + else { + oidnSetSharedFilterImage(normal_filter_, + "color", + (void *)d_guiding_buffer, + OIDN_FORMAT_FLOAT3, + context.buffer_params.width, + context.buffer_params.height, + context.guiding_params.pass_normal * sizeof(float), + pixel_stride_in_bytes, + row_stride_in_bytes); + + oidnSetSharedFilterImage(normal_filter_, + "output", + (void *)d_guiding_buffer, + OIDN_FORMAT_FLOAT3, + context.buffer_params.width, + context.buffer_params.height, + context.guiding_params.pass_normal * sizeof(float), + pixel_stride_in_bytes, + row_stride_in_bytes); + + oidnCommitFilter(normal_filter_); + oidnExecuteFilterAsync(normal_filter_); + + oidnSetSharedFilterImage(oidn_filter_, + "normal", + (void *)d_guiding_buffer, + OIDN_FORMAT_FLOAT3, + context.buffer_params.width, + context.buffer_params.height, + context.guiding_params.pass_normal * sizeof(float), + pixel_stride_in_bytes, + row_stride_in_bytes); + } + } + } + + oidnCommitFilter(oidn_filter_); + oidnExecuteFilter(oidn_filter_); + + const char *out_message = nullptr; + OIDNError err = oidnGetDeviceError(oidn_device_, (const char **)&out_message); + if (OIDN_ERROR_NONE != err) { + /* If OIDN runs out of memory, reduce mem limit and retry */ + while (err == OIDN_ERROR_OUT_OF_MEMORY && max_mem_ > 200) { + max_mem_ = max_mem_ / 2; + oidnSetFilterInt(oidn_filter_, "maxMemoryMB", max_mem_); + oidnCommitFilter(oidn_filter_); + oidnExecuteFilter(oidn_filter_); + err = oidnGetDeviceError(oidn_device_, &out_message); + } + if (out_message) { + LOG(ERROR) << "OIDN error: " << out_message; + denoiser_device_->set_error(out_message); + } + else { + LOG(ERROR) << "OIDN error: unspecified"; + denoiser_device_->set_error("Unspecified OIDN error"); + } + return false; + } + return true; +} + +CCL_NAMESPACE_END + +#endif \ No newline at end of file diff --git a/intern/cycles/integrator/denoiser_oidn_gpu.h b/intern/cycles/integrator/denoiser_oidn_gpu.h new file mode 100644 index 00000000000..bd815140956 --- /dev/null +++ b/intern/cycles/integrator/denoiser_oidn_gpu.h @@ -0,0 +1,75 @@ +/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation + * + * SPDX-License-Identifier: Apache-2.0 */ + +#pragma once + +#if defined(WITH_OPENIMAGEDENOISE) + +# include "integrator/denoiser_gpu.h" +# include "util/thread.h" +# include "util/unique_ptr.h" + +typedef struct OIDNDeviceImpl *OIDNDevice; +typedef struct OIDNFilterImpl *OIDNFilter; +typedef struct OIDNBufferImpl *OIDNBuffer; + +CCL_NAMESPACE_BEGIN + +/* Implementation of a GPU denoiser which uses OpenImageDenoise library. */ +class OIDNDenoiserGPU : public DenoiserGPU { + friend class OIDNDenoiseContext; + + public: + /* Forwardly declared state which might be using compile-flag specific fields, such as + * OpenImageDenoise device and filter handles. */ + class State; + + OIDNDenoiserGPU(Device *path_trace_device, const DenoiseParams ¶ms); + ~OIDNDenoiserGPU(); + + virtual bool denoise_buffer(const BufferParams &buffer_params, + RenderBuffers *render_buffers, + const int num_samples, + bool allow_inplace_modification) override; + + static bool is_device_supported(const DeviceInfo &device); + static bool is_device_type_supported(const DeviceType &type); + + protected: + virtual uint get_device_type_mask() const override; + + /* We only perform one denoising at a time, since OpenImageDenoise itself is multithreaded. + * Use this mutex whenever images are passed to the OIDN and needs to be denoised. */ + static thread_mutex mutex_; + + /* Create OIDN denoiser descriptor if needed. + * Will do nothing if the current OIDN descriptor is usable for the given parameters. + * If the OIDN denoiser descriptor did re-allocate here it is left unconfigured. */ + virtual bool denoise_create_if_needed(DenoiseContext &context) override; + + /* Configure existing OIDN denoiser descriptor for the use for the given task. */ + virtual bool denoise_configure_if_needed(DenoiseContext &context) override; + + /* Run configured denoiser. */ + virtual bool denoise_run(const DenoiseContext &context, const DenoisePass &pass) override; + + OIDNFilter create_filter(); + + OIDNDevice oidn_device_ = nullptr; + OIDNFilter oidn_filter_ = nullptr; + OIDNFilter albedo_filter_ = nullptr; + OIDNFilter normal_filter_ = nullptr; + + bool is_configured_ = false; + int2 configured_size_ = make_int2(0, 0); + + bool use_pass_albedo_ = false; + bool use_pass_normal_ = false; + + int max_mem_ = 3000; +}; + +CCL_NAMESPACE_END + +#endif diff --git a/intern/cycles/kernel/device/oneapi/kernel.cpp b/intern/cycles/kernel/device/oneapi/kernel.cpp index 880f03095e6..51c370c0a93 100644 --- a/intern/cycles/kernel/device/oneapi/kernel.cpp +++ b/intern/cycles/kernel/device/oneapi/kernel.cpp @@ -137,6 +137,9 @@ bool oneapi_run_test_kernel(SyclQueue *queue_) bool oneapi_kernel_is_required_for_features(const std::string &kernel_name, const uint kernel_features) { + /* Skip all non-Cycles kernels */ + if (kernel_name.find("oneapi_kernel_") == std::string::npos) + return false; if ((kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) == 0 && kernel_name.find(device_kernel_as_string(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE)) != std::string::npos)