/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation * * SPDX-License-Identifier: Apache-2.0 */ #include "integrator/denoiser_gpu.h" #include "device/denoise.h" #include "device/device.h" #include "device/memory.h" #include "device/queue.h" #include "integrator/pass_accessor_gpu.h" #include "session/buffers.h" #include "util/log.h" #include "util/progress.h" CCL_NAMESPACE_BEGIN DenoiserGPU::DenoiserGPU(Device *denoiser_device, const DenoiseParams ¶ms) : Denoiser(denoiser_device, params) { denoiser_queue_ = denoiser_device->gpu_queue_create(); DCHECK(denoiser_queue_); } DenoiserGPU::~DenoiserGPU() { /* Explicit implementation, to allow forward declaration of Device in the header. */ } bool DenoiserGPU::denoise_buffer(const BufferParams &buffer_params, RenderBuffers *render_buffers, const int num_samples, bool allow_inplace_modification) { Device *denoiser_device = get_denoiser_device(); if (!denoiser_device) { return false; } DenoiseTask task; task.params = params_; task.num_samples = num_samples; task.buffer_params = buffer_params; task.allow_inplace_modification = allow_inplace_modification; RenderBuffers local_render_buffers(denoiser_device); bool local_buffer_used = false; if (denoiser_device == render_buffers->buffer.device) { /* The device can access an existing buffer pointer. */ local_buffer_used = false; task.render_buffers = render_buffers; } else { VLOG_WORK << "Creating temporary buffer on denoiser device."; /* Create buffer which is available by the device used by denoiser. */ /* TODO(sergey): Optimize data transfers. For example, only copy denoising related passes, * ignoring other light ad data passes. */ local_buffer_used = true; render_buffers->copy_from_device(); local_render_buffers.reset(buffer_params); /* NOTE: The local buffer is allocated for an exact size of the effective render size, while * the input render buffer is allocated for the lowest resolution divider possible. So it is * important to only copy actually needed part of the input buffer. */ memcpy(local_render_buffers.buffer.data(), render_buffers->buffer.data(), sizeof(float) * local_render_buffers.buffer.size()); denoiser_queue_->copy_to_device(local_render_buffers.buffer); task.render_buffers = &local_render_buffers; task.allow_inplace_modification = true; } const bool denoise_result = denoise_buffer(task); if (local_buffer_used) { local_render_buffers.copy_from_device(); render_buffers_host_copy_denoised( render_buffers, buffer_params, &local_render_buffers, local_render_buffers.params); render_buffers->copy_to_device(); } return denoise_result; } bool DenoiserGPU::denoise_buffer(const DenoiseTask &task) { DenoiseContext context(denoiser_device_, task); if (!denoise_ensure(context)) { return false; } if (!denoise_filter_guiding_preprocess(context)) { LOG(ERROR) << "Error preprocessing guiding passes."; return false; } /* Passes which will use real albedo when it is available. */ denoise_pass(context, PASS_COMBINED); denoise_pass(context, PASS_SHADOW_CATCHER_MATTE); /* Passes which do not need albedo and hence if real is present it needs to become fake. */ denoise_pass(context, PASS_SHADOW_CATCHER); return true; } bool DenoiserGPU::denoise_ensure(DenoiseContext &context) { if (!denoise_create_if_needed(context)) { LOG(ERROR) << "GPU denoiser creation has failed."; return false; } if (!denoise_configure_if_needed(context)) { LOG(ERROR) << "GPU denoiser configuration has failed."; return false; } return true; } bool DenoiserGPU::denoise_filter_guiding_preprocess(const DenoiseContext &context) { const BufferParams &buffer_params = context.buffer_params; const int work_size = buffer_params.width * buffer_params.height; DeviceKernelArguments args(&context.guiding_params.device_pointer, &context.guiding_params.pass_stride, &context.guiding_params.pass_albedo, &context.guiding_params.pass_normal, &context.guiding_params.pass_flow, &context.render_buffers->buffer.device_pointer, &buffer_params.offset, &buffer_params.stride, &buffer_params.pass_stride, &context.pass_sample_count, &context.pass_denoising_albedo, &context.pass_denoising_normal, &context.pass_motion, &buffer_params.full_x, &buffer_params.full_y, &buffer_params.width, &buffer_params.height, &context.num_samples); return denoiser_queue_->enqueue(DEVICE_KERNEL_FILTER_GUIDING_PREPROCESS, work_size, args); } DenoiserGPU::DenoiseContext::DenoiseContext(Device *device, const DenoiseTask &task) : denoise_params(task.params), render_buffers(task.render_buffers), buffer_params(task.buffer_params), guiding_buffer(device, "denoiser guiding passes buffer", true), num_samples(task.num_samples) { num_input_passes = 1; if (denoise_params.use_pass_albedo) { num_input_passes += 1; use_pass_albedo = true; pass_denoising_albedo = buffer_params.get_pass_offset(PASS_DENOISING_ALBEDO); if (denoise_params.use_pass_normal) { num_input_passes += 1; use_pass_normal = true; pass_denoising_normal = buffer_params.get_pass_offset(PASS_DENOISING_NORMAL); } } if (denoise_params.temporally_stable) { prev_output.device_pointer = render_buffers->buffer.device_pointer; prev_output.offset = buffer_params.get_pass_offset(PASS_DENOISING_PREVIOUS); prev_output.stride = buffer_params.stride; prev_output.pass_stride = buffer_params.pass_stride; num_input_passes += 1; use_pass_motion = true; pass_motion = buffer_params.get_pass_offset(PASS_MOTION); } use_guiding_passes = (num_input_passes - 1) > 0; if (use_guiding_passes) { if (task.allow_inplace_modification) { guiding_params.device_pointer = render_buffers->buffer.device_pointer; guiding_params.pass_albedo = pass_denoising_albedo; guiding_params.pass_normal = pass_denoising_normal; guiding_params.pass_flow = pass_motion; guiding_params.stride = buffer_params.stride; guiding_params.pass_stride = buffer_params.pass_stride; } else { guiding_params.pass_stride = 0; if (use_pass_albedo) { guiding_params.pass_albedo = guiding_params.pass_stride; guiding_params.pass_stride += 3; } if (use_pass_normal) { guiding_params.pass_normal = guiding_params.pass_stride; guiding_params.pass_stride += 3; } if (use_pass_motion) { guiding_params.pass_flow = guiding_params.pass_stride; guiding_params.pass_stride += 2; } guiding_params.stride = buffer_params.width; guiding_buffer.alloc_to_device(buffer_params.width * buffer_params.height * guiding_params.pass_stride); guiding_params.device_pointer = guiding_buffer.device_pointer; } } pass_sample_count = buffer_params.get_pass_offset(PASS_SAMPLE_COUNT); } bool DenoiserGPU::denoise_filter_color_postprocess(const DenoiseContext &context, const DenoisePass &pass) { const BufferParams &buffer_params = context.buffer_params; const int work_size = buffer_params.width * buffer_params.height; DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer, &buffer_params.full_x, &buffer_params.full_y, &buffer_params.width, &buffer_params.height, &buffer_params.offset, &buffer_params.stride, &buffer_params.pass_stride, &context.num_samples, &pass.noisy_offset, &pass.denoised_offset, &context.pass_sample_count, &pass.num_components, &pass.use_compositing); return denoiser_queue_->enqueue(DEVICE_KERNEL_FILTER_COLOR_POSTPROCESS, work_size, args); } bool DenoiserGPU::denoise_filter_color_preprocess(const DenoiseContext &context, const DenoisePass &pass) { const BufferParams &buffer_params = context.buffer_params; const int work_size = buffer_params.width * buffer_params.height; DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer, &buffer_params.full_x, &buffer_params.full_y, &buffer_params.width, &buffer_params.height, &buffer_params.offset, &buffer_params.stride, &buffer_params.pass_stride, &pass.denoised_offset); return denoiser_queue_->enqueue(DEVICE_KERNEL_FILTER_COLOR_PREPROCESS, work_size, args); } bool DenoiserGPU::denoise_filter_guiding_set_fake_albedo(const DenoiseContext &context) { const BufferParams &buffer_params = context.buffer_params; const int work_size = buffer_params.width * buffer_params.height; DeviceKernelArguments args(&context.guiding_params.device_pointer, &context.guiding_params.pass_stride, &context.guiding_params.pass_albedo, &buffer_params.width, &buffer_params.height); return denoiser_queue_->enqueue(DEVICE_KERNEL_FILTER_GUIDING_SET_FAKE_ALBEDO, work_size, args); } void DenoiserGPU::denoise_color_read(const DenoiseContext &context, const DenoisePass &pass) { PassAccessor::PassAccessInfo pass_access_info; pass_access_info.type = pass.type; pass_access_info.mode = PassMode::NOISY; pass_access_info.offset = pass.noisy_offset; /* Denoiser operates on passes which are used to calculate the approximation, and is never used * on the approximation. The latter is not even possible because OptiX does not support * denoising of semi-transparent pixels. */ pass_access_info.use_approximate_shadow_catcher = false; pass_access_info.use_approximate_shadow_catcher_background = false; pass_access_info.show_active_pixels = false; /* TODO(sergey): Consider adding support of actual exposure, to avoid clamping in extreme cases. */ const PassAccessorGPU pass_accessor( denoiser_queue_.get(), pass_access_info, 1.0f, context.num_samples); PassAccessor::Destination destination(pass_access_info.type); destination.d_pixels = context.render_buffers->buffer.device_pointer; destination.num_components = 3; destination.pixel_offset = pass.denoised_offset; destination.pixel_stride = context.buffer_params.pass_stride; BufferParams buffer_params = context.buffer_params; buffer_params.window_x = 0; buffer_params.window_y = 0; buffer_params.window_width = buffer_params.width; buffer_params.window_height = buffer_params.height; pass_accessor.get_render_tile_pixels(context.render_buffers, buffer_params, destination); } void DenoiserGPU::denoise_pass(DenoiseContext &context, PassType pass_type) { const BufferParams &buffer_params = context.buffer_params; const DenoisePass pass(pass_type, buffer_params); if (pass.noisy_offset == PASS_UNUSED) { return; } if (pass.denoised_offset == PASS_UNUSED) { LOG(DFATAL) << "Missing denoised pass " << pass_type_as_string(pass_type); return; } if (pass.use_denoising_albedo) { if (context.albedo_replaced_with_fake) { LOG(ERROR) << "Pass which requires albedo is denoised after fake albedo has been set."; return; } } else if (context.use_guiding_passes && !context.albedo_replaced_with_fake) { context.albedo_replaced_with_fake = true; if (!denoise_filter_guiding_set_fake_albedo(context)) { LOG(ERROR) << "Error replacing real albedo with the fake one."; return; } } /* Read and preprocess noisy color input pass. */ denoise_color_read(context, pass); if (!denoise_filter_color_preprocess(context, pass)) { LOG(ERROR) << "Error converting denoising passes to RGB buffer."; return; } if (!denoise_run(context, pass)) { LOG(ERROR) << "Error running denoiser."; return; } /* Store result in the combined pass of the render buffer. * * This will scale the denoiser result up to match the number of, possibly per-pixel, samples. */ if (!denoise_filter_color_postprocess(context, pass)) { LOG(ERROR) << "Error copying denoiser result to the denoised pass."; return; } denoiser_queue_->synchronize(); } CCL_NAMESPACE_END