/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation * * SPDX-License-Identifier: Apache-2.0 */ #if defined(WITH_OPENIMAGEDENOISE) # include "integrator/denoiser_oidn_gpu.h" # include "device/device.h" # include "device/oneapi/device_impl.h" # include "device/queue.h" # include "session/buffers.h" # include "util/log.h" # include "util/path.h" # if OIDN_VERSION_MAJOR < 2 # define oidnSetFilterBool oidnSetFilter1b # define oidnSetFilterInt oidnSetFilter1i # define oidnExecuteFilterAsync oidnExecuteFilter # endif CCL_NAMESPACE_BEGIN # if OIDN_VERSION < 20300 static const char *oidn_device_type_to_string(const OIDNDeviceType type) { switch (type) { case OIDN_DEVICE_TYPE_DEFAULT: return "DEFAULT"; case OIDN_DEVICE_TYPE_CPU: return "CPU"; /* The initial GPU support was added in OIDN 2.0. */ # if OIDN_VERSION_MAJOR >= 2 case OIDN_DEVICE_TYPE_SYCL: return "SYCL"; case OIDN_DEVICE_TYPE_CUDA: return "CUDA"; case OIDN_DEVICE_TYPE_HIP: return "HIP"; # endif /* The Metal support was added in OIDN 2.2. */ # if (OIDN_VERSION_MAJOR > 2) || ((OIDN_VERSION_MAJOR == 2) && (OIDN_VERSION_MINOR >= 2)) case OIDN_DEVICE_TYPE_METAL: return "METAL"; # endif } return "UNKNOWN"; } # endif bool OIDNDenoiserGPU::is_device_supported(const DeviceInfo &device) { # if OIDN_VERSION >= 20300 if (device.type == DEVICE_MULTI) { for (const DeviceInfo &multi_device : device.multi_devices) { if (multi_device.type != DEVICE_CPU && multi_device.denoisers & DENOISER_OPENIMAGEDENOISE) { return true; } } return false; } return device.denoisers & DENOISER_OPENIMAGEDENOISE; # else if (device.type == DEVICE_MULTI) { for (const DeviceInfo &multi_device : device.multi_devices) { if (multi_device.type != DEVICE_CPU && is_device_supported(multi_device)) { return true; } } return false; } VLOG_DEBUG << "Checking device " << device.description << " (" << device.id << ") for OIDN GPU support"; int device_type = OIDN_DEVICE_TYPE_DEFAULT; switch (device.type) { # ifdef OIDN_DEVICE_SYCL case DEVICE_ONEAPI: device_type = OIDN_DEVICE_TYPE_SYCL; break; # endif # ifdef OIDN_DEVICE_HIP case DEVICE_HIP: device_type = OIDN_DEVICE_TYPE_HIP; break; # endif # ifdef OIDN_DEVICE_CUDA case DEVICE_CUDA: case DEVICE_OPTIX: device_type = OIDN_DEVICE_TYPE_CUDA; break; # endif # ifdef OIDN_DEVICE_METAL case DEVICE_METAL: { const int num_devices = oidnGetNumPhysicalDevices(); VLOG_DEBUG << "Found " << num_devices << " OIDN device(s)"; for (int i = 0; i < num_devices; i++) { const int type = oidnGetPhysicalDeviceInt(i, "type"); const char *name = oidnGetPhysicalDeviceString(i, "name"); VLOG_DEBUG << "OIDN device " << i << ": name=\"" << name << "\", type=" << oidn_device_type_to_string(OIDNDeviceType(type)); if (type == OIDN_DEVICE_TYPE_METAL) { if (device.id.find(name) != std::string::npos) { VLOG_DEBUG << "OIDN device name matches the Cycles device name"; return true; } } } VLOG_DEBUG << "No matched OIDN device found"; return false; } # endif case DEVICE_CPU: /* This is the GPU denoiser - CPU devices shouldn't end up here. */ assert(0); default: return false; } /* Match GPUs by their PCI ID. */ const int num_devices = oidnGetNumPhysicalDevices(); VLOG_DEBUG << "Found " << num_devices << " OIDN device(s)"; for (int i = 0; i < num_devices; i++) { const int type = oidnGetPhysicalDeviceInt(i, "type"); const char *name = oidnGetPhysicalDeviceString(i, "name"); VLOG_DEBUG << "OIDN device " << i << ": name=\"" << name << "\" type=" << oidn_device_type_to_string(OIDNDeviceType(type)); if (type == device_type) { if (oidnGetPhysicalDeviceBool(i, "pciAddressSupported")) { unsigned int pci_domain = oidnGetPhysicalDeviceInt(i, "pciDomain"); unsigned int pci_bus = oidnGetPhysicalDeviceInt(i, "pciBus"); unsigned int pci_device = oidnGetPhysicalDeviceInt(i, "pciDevice"); string pci_id = string_printf("%04x:%02x:%02x", pci_domain, pci_bus, pci_device); VLOG_INFO << "OIDN device PCI-e identifier: " << pci_id; if (device.id.find(pci_id) != string::npos) { VLOG_DEBUG << "OIDN device PCI-e identifier matches the Cycles device ID"; return true; } } else { VLOG_DEBUG << "Device does not support pciAddressSupported"; } } } VLOG_DEBUG << "No matched OIDN device found"; return false; # endif } OIDNDenoiserGPU::OIDNDenoiserGPU(Device *denoiser_device, const DenoiseParams ¶ms) : DenoiserGPU(denoiser_device, params) { DCHECK_EQ(params.type, DENOISER_OPENIMAGEDENOISE); } OIDNDenoiserGPU::~OIDNDenoiserGPU() { release_all_resources(); } bool OIDNDenoiserGPU::denoise_buffer(const BufferParams &buffer_params, RenderBuffers *render_buffers, const int num_samples, bool allow_inplace_modification) { return DenoiserGPU::denoise_buffer( buffer_params, render_buffers, num_samples, allow_inplace_modification); } uint OIDNDenoiserGPU::get_device_type_mask() const { uint device_mask = 0; # ifdef OIDN_DEVICE_SYCL device_mask |= DEVICE_MASK_ONEAPI; # endif # ifdef OIDN_DEVICE_METAL device_mask |= DEVICE_MASK_METAL; # endif # ifdef OIDN_DEVICE_CUDA device_mask |= DEVICE_MASK_CUDA; device_mask |= DEVICE_MASK_OPTIX; # endif # ifdef OIDN_DEVICE_HIP device_mask |= DEVICE_MASK_HIP; # endif return device_mask; } OIDNFilter OIDNDenoiserGPU::create_filter() { const char *error_message = nullptr; OIDNFilter filter = oidnNewFilter(oidn_device_, "RT"); if (filter == nullptr) { const OIDNError err = oidnGetDeviceError(oidn_device_, &error_message); if (OIDN_ERROR_NONE != err) { LOG(ERROR) << "OIDN error: " << error_message; set_error(error_message); } } # if OIDN_VERSION_MAJOR >= 2 switch (quality_) { case DENOISER_QUALITY_FAST: # if OIDN_VERSION >= 20300 oidnSetFilterInt(filter, "quality", OIDN_QUALITY_FAST); break; # endif case DENOISER_QUALITY_BALANCED: oidnSetFilterInt(filter, "quality", OIDN_QUALITY_BALANCED); break; case DENOISER_QUALITY_HIGH: default: oidnSetFilterInt(filter, "quality", OIDN_QUALITY_HIGH); } # endif return filter; } bool OIDNDenoiserGPU::commit_and_execute_filter(OIDNFilter filter, ExecMode mode) { const char *error_message = nullptr; OIDNError err = OIDN_ERROR_NONE; for (;;) { oidnCommitFilter(filter); if (mode == ExecMode::ASYNC) { oidnExecuteFilterAsync(filter); } else { oidnExecuteFilter(filter); } /* If OIDN runs out of memory, reduce mem limit and retry */ err = oidnGetDeviceError(oidn_device_, &error_message); if (err != OIDN_ERROR_OUT_OF_MEMORY || max_mem_ < 200) { break; } max_mem_ = max_mem_ / 2; oidnSetFilterInt(filter, "maxMemoryMB", max_mem_); } if (err != OIDN_ERROR_NONE) { if (error_message == nullptr) { error_message = "Unspecified OIDN error"; } LOG(ERROR) << "OIDN error: " << error_message; set_error(error_message); return false; } return true; } bool OIDNDenoiserGPU::denoise_create_if_needed(DenoiseContext &context) { const bool recreate_denoiser = (oidn_device_ == nullptr) || (oidn_filter_ == nullptr) || (use_pass_albedo_ != context.use_pass_albedo) || (use_pass_normal_ != context.use_pass_normal) || (quality_ != params_.quality); if (!recreate_denoiser) { return true; } /* Destroy existing handles before creating new ones. */ release_all_resources(); switch (denoiser_device_->info.type) { # if defined(OIDN_DEVICE_SYCL) && defined(WITH_ONEAPI) case DEVICE_ONEAPI: oidn_device_ = oidnNewSYCLDevice( (const sycl::queue *)reinterpret_cast(denoiser_device_)->sycl_queue(), 1); break; # endif # if defined(OIDN_DEVICE_METAL) && defined(WITH_METAL) case DEVICE_METAL: { denoiser_queue_->init_execution(); const MTLCommandQueue_id queue = (const MTLCommandQueue_id)denoiser_queue_->native_queue(); oidn_device_ = oidnNewMetalDevice(&queue, 1); } break; # endif # if defined(OIDN_DEVICE_CUDA) && defined(WITH_CUDA) case DEVICE_CUDA: case DEVICE_OPTIX: { /* Directly using the stream from the DeviceQueue returns "invalid resource handle". */ cudaStream_t stream = nullptr; oidn_device_ = oidnNewCUDADevice(&denoiser_device_->info.num, &stream, 1); break; } # endif # if defined(OIDN_DEVICE_HIP) && defined(WITH_HIP) case DEVICE_HIP: { hipStream_t stream = nullptr; oidn_device_ = oidnNewHIPDevice(&denoiser_device_->info.num, &stream, 1); break; } # endif default: break; } if (!oidn_device_) { set_error("Failed to create OIDN device"); return false; } if (denoiser_queue_) { denoiser_queue_->init_execution(); } oidnCommitDevice(oidn_device_); quality_ = params_.quality; oidn_filter_ = create_filter(); if (oidn_filter_ == nullptr) { return false; } oidnSetFilterBool(oidn_filter_, "hdr", true); oidnSetFilterBool(oidn_filter_, "srgb", false); const char *custom_weight_path = getenv("CYCLES_OIDN_CUSTOM_WEIGHTS"); if (custom_weight_path) { if (path_read_binary(custom_weight_path, custom_weights)) { oidnSetSharedFilterData( oidn_filter_, "weights", custom_weights.data(), custom_weights.size()); } else { fprintf(stderr, "Cycles: Failed to load custom OIDN weights!"); } } if (context.use_pass_albedo) { albedo_filter_ = create_filter(); if (albedo_filter_ == nullptr) { return false; } } if (context.use_pass_normal) { normal_filter_ = create_filter(); if (normal_filter_ == nullptr) { return false; } } /* OIDN denoiser handle was created with the requested number of input passes. */ use_pass_albedo_ = context.use_pass_albedo; use_pass_normal_ = context.use_pass_normal; /* OIDN denoiser has been created, but it needs configuration. */ is_configured_ = false; return true; } bool OIDNDenoiserGPU::denoise_configure_if_needed(DenoiseContext &context) { /* Limit maximum tile size denoiser can be invoked with. */ const int2 size = make_int2(context.buffer_params.width, context.buffer_params.height); if (is_configured_ && (configured_size_.x == size.x && configured_size_.y == size.y)) { return true; } is_configured_ = true; configured_size_ = size; return true; } bool OIDNDenoiserGPU::denoise_run(const DenoiseContext &context, const DenoisePass &pass) { /* Color pass. */ const int64_t pass_stride_in_bytes = context.buffer_params.pass_stride * sizeof(float); set_filter_pass(oidn_filter_, "color", context.render_buffers->buffer.device_pointer, OIDN_FORMAT_FLOAT3, context.buffer_params.width, context.buffer_params.height, pass.denoised_offset * sizeof(float), pass_stride_in_bytes, pass_stride_in_bytes * context.buffer_params.stride); set_filter_pass(oidn_filter_, "output", context.render_buffers->buffer.device_pointer, OIDN_FORMAT_FLOAT3, context.buffer_params.width, context.buffer_params.height, pass.denoised_offset * sizeof(float), pass_stride_in_bytes, pass_stride_in_bytes * context.buffer_params.stride); /* Optional albedo and color passes. */ if (context.num_input_passes > 1) { const device_ptr d_guiding_buffer = context.guiding_params.device_pointer; const int64_t pixel_stride_in_bytes = context.guiding_params.pass_stride * sizeof(float); const int64_t row_stride_in_bytes = context.guiding_params.stride * pixel_stride_in_bytes; if (context.use_pass_albedo) { set_filter_pass(oidn_filter_, "albedo", d_guiding_buffer, OIDN_FORMAT_FLOAT3, context.buffer_params.width, context.buffer_params.height, context.guiding_params.pass_albedo * sizeof(float), pixel_stride_in_bytes, row_stride_in_bytes); if (params_.prefilter == DENOISER_PREFILTER_ACCURATE) { set_filter_pass(albedo_filter_, "albedo", d_guiding_buffer, OIDN_FORMAT_FLOAT3, context.buffer_params.width, context.buffer_params.height, context.guiding_params.pass_albedo * sizeof(float), pixel_stride_in_bytes, row_stride_in_bytes); set_filter_pass(albedo_filter_, "output", d_guiding_buffer, OIDN_FORMAT_FLOAT3, context.buffer_params.width, context.buffer_params.height, context.guiding_params.pass_albedo * sizeof(float), pixel_stride_in_bytes, row_stride_in_bytes); if (!commit_and_execute_filter(albedo_filter_, ExecMode::ASYNC)) { return false; } } } if (context.use_pass_normal) { set_filter_pass(oidn_filter_, "normal", d_guiding_buffer, OIDN_FORMAT_FLOAT3, context.buffer_params.width, context.buffer_params.height, context.guiding_params.pass_normal * sizeof(float), pixel_stride_in_bytes, row_stride_in_bytes); if (params_.prefilter == DENOISER_PREFILTER_ACCURATE) { set_filter_pass(normal_filter_, "normal", d_guiding_buffer, OIDN_FORMAT_FLOAT3, context.buffer_params.width, context.buffer_params.height, context.guiding_params.pass_normal * sizeof(float), pixel_stride_in_bytes, row_stride_in_bytes); set_filter_pass(normal_filter_, "output", d_guiding_buffer, OIDN_FORMAT_FLOAT3, context.buffer_params.width, context.buffer_params.height, context.guiding_params.pass_normal * sizeof(float), pixel_stride_in_bytes, row_stride_in_bytes); if (!commit_and_execute_filter(normal_filter_, ExecMode::ASYNC)) { return false; } } } } oidnSetFilterInt(oidn_filter_, "cleanAux", params_.prefilter != DENOISER_PREFILTER_FAST); return commit_and_execute_filter(oidn_filter_); } void OIDNDenoiserGPU::set_filter_pass(OIDNFilter filter, const char *name, device_ptr ptr, const int format, const int width, const int height, const size_t offset_in_bytes, const size_t pixel_stride_in_bytes, const size_t row_stride_in_bytes) { # if defined(OIDN_DEVICE_METAL) && defined(WITH_METAL) if (denoiser_device_->info.type == DEVICE_METAL) { void *mtl_buffer = denoiser_device_->get_native_buffer(ptr); OIDNBuffer oidn_buffer = oidnNewSharedBufferFromMetal(oidn_device_, mtl_buffer); oidnSetFilterImage(filter, name, oidn_buffer, (OIDNFormat)format, width, height, offset_in_bytes, pixel_stride_in_bytes, row_stride_in_bytes); oidnReleaseBuffer(oidn_buffer); } else # endif { oidnSetSharedFilterImage(filter, name, (void *)ptr, (OIDNFormat)format, width, height, offset_in_bytes, pixel_stride_in_bytes, row_stride_in_bytes); } } void OIDNDenoiserGPU::release_all_resources() { if (albedo_filter_) { oidnReleaseFilter(albedo_filter_); albedo_filter_ = nullptr; } if (normal_filter_) { oidnReleaseFilter(normal_filter_); normal_filter_ = nullptr; } if (oidn_filter_) { oidnReleaseFilter(oidn_filter_); oidn_filter_ = nullptr; } if (oidn_device_) { oidnReleaseDevice(oidn_device_); oidn_device_ = nullptr; } } CCL_NAMESPACE_END #endif