Cycles: Refactored GPU denoising code

To prepare for OIDN2 with GPU support, some of the code that was exclusive to the OptiXDenoiser is being moved to the DenoiserGPU superclass.

Co-authored-by: Stefan Werner <stefan.werner@intel.com>
Pull Request: https://projects.blender.org/blender/blender/pulls/106496
This commit is contained in:
Stefan Werner
2023-04-05 11:19:15 +02:00
parent 9e9baa9085
commit a76bf65c9d
4 changed files with 314 additions and 310 deletions

View File

@@ -7,6 +7,7 @@
#include "device/device.h"
#include "device/memory.h"
#include "device/queue.h"
#include "integrator/pass_accessor_gpu.h"
#include "session/buffers.h"
#include "util/log.h"
#include "util/progress.h"
@@ -105,4 +106,220 @@ Device *DenoiserGPU::ensure_denoiser_device(Progress *progress)
return denoiser_device;
}
DenoiserGPU::DenoiseContext::DenoiseContext(Device *device, const DenoiseTask &task)
: denoise_params(task.params),
render_buffers(task.render_buffers),
buffer_params(task.buffer_params),
guiding_buffer(device, "denoiser guiding passes buffer", true),
num_samples(task.num_samples)
{
num_input_passes = 1;
if (denoise_params.use_pass_albedo) {
num_input_passes += 1;
use_pass_albedo = true;
pass_denoising_albedo = buffer_params.get_pass_offset(PASS_DENOISING_ALBEDO);
if (denoise_params.use_pass_normal) {
num_input_passes += 1;
use_pass_normal = true;
pass_denoising_normal = buffer_params.get_pass_offset(PASS_DENOISING_NORMAL);
}
}
if (denoise_params.temporally_stable) {
prev_output.device_pointer = render_buffers->buffer.device_pointer;
prev_output.offset = buffer_params.get_pass_offset(PASS_DENOISING_PREVIOUS);
prev_output.stride = buffer_params.stride;
prev_output.pass_stride = buffer_params.pass_stride;
num_input_passes += 1;
use_pass_motion = true;
pass_motion = buffer_params.get_pass_offset(PASS_MOTION);
}
use_guiding_passes = (num_input_passes - 1) > 0;
if (use_guiding_passes) {
if (task.allow_inplace_modification) {
guiding_params.device_pointer = render_buffers->buffer.device_pointer;
guiding_params.pass_albedo = pass_denoising_albedo;
guiding_params.pass_normal = pass_denoising_normal;
guiding_params.pass_flow = pass_motion;
guiding_params.stride = buffer_params.stride;
guiding_params.pass_stride = buffer_params.pass_stride;
}
else {
guiding_params.pass_stride = 0;
if (use_pass_albedo) {
guiding_params.pass_albedo = guiding_params.pass_stride;
guiding_params.pass_stride += 3;
}
if (use_pass_normal) {
guiding_params.pass_normal = guiding_params.pass_stride;
guiding_params.pass_stride += 3;
}
if (use_pass_motion) {
guiding_params.pass_flow = guiding_params.pass_stride;
guiding_params.pass_stride += 2;
}
guiding_params.stride = buffer_params.width;
guiding_buffer.alloc_to_device(buffer_params.width * buffer_params.height *
guiding_params.pass_stride);
guiding_params.device_pointer = guiding_buffer.device_pointer;
}
}
pass_sample_count = buffer_params.get_pass_offset(PASS_SAMPLE_COUNT);
}
bool DenoiserGPU::denoise_filter_color_postprocess(const DenoiseContext &context,
const DenoisePass &pass)
{
const BufferParams &buffer_params = context.buffer_params;
const int work_size = buffer_params.width * buffer_params.height;
DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer,
&buffer_params.full_x,
&buffer_params.full_y,
&buffer_params.width,
&buffer_params.height,
&buffer_params.offset,
&buffer_params.stride,
&buffer_params.pass_stride,
&context.num_samples,
&pass.noisy_offset,
&pass.denoised_offset,
&context.pass_sample_count,
&pass.num_components,
&pass.use_compositing);
return denoiser_queue_->enqueue(DEVICE_KERNEL_FILTER_COLOR_POSTPROCESS, work_size, args);
}
bool DenoiserGPU::denoise_filter_color_preprocess(const DenoiseContext &context,
const DenoisePass &pass)
{
const BufferParams &buffer_params = context.buffer_params;
const int work_size = buffer_params.width * buffer_params.height;
DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer,
&buffer_params.full_x,
&buffer_params.full_y,
&buffer_params.width,
&buffer_params.height,
&buffer_params.offset,
&buffer_params.stride,
&buffer_params.pass_stride,
&pass.denoised_offset);
return denoiser_queue_->enqueue(DEVICE_KERNEL_FILTER_COLOR_PREPROCESS, work_size, args);
}
bool DenoiserGPU::denoise_filter_guiding_set_fake_albedo(const DenoiseContext &context)
{
const BufferParams &buffer_params = context.buffer_params;
const int work_size = buffer_params.width * buffer_params.height;
DeviceKernelArguments args(&context.guiding_params.device_pointer,
&context.guiding_params.pass_stride,
&context.guiding_params.pass_albedo,
&buffer_params.width,
&buffer_params.height);
return denoiser_queue_->enqueue(DEVICE_KERNEL_FILTER_GUIDING_SET_FAKE_ALBEDO, work_size, args);
}
void DenoiserGPU::denoise_color_read(const DenoiseContext &context, const DenoisePass &pass)
{
PassAccessor::PassAccessInfo pass_access_info;
pass_access_info.type = pass.type;
pass_access_info.mode = PassMode::NOISY;
pass_access_info.offset = pass.noisy_offset;
/* Denoiser operates on passes which are used to calculate the approximation, and is never used
* on the approximation. The latter is not even possible because OptiX does not support
* denoising of semi-transparent pixels. */
pass_access_info.use_approximate_shadow_catcher = false;
pass_access_info.use_approximate_shadow_catcher_background = false;
pass_access_info.show_active_pixels = false;
/* TODO(sergey): Consider adding support of actual exposure, to avoid clamping in extreme cases.
*/
const PassAccessorGPU pass_accessor(
denoiser_queue_.get(), pass_access_info, 1.0f, context.num_samples);
PassAccessor::Destination destination(pass_access_info.type);
destination.d_pixels = context.render_buffers->buffer.device_pointer +
pass.denoised_offset * sizeof(float);
destination.num_components = 3;
destination.pixel_stride = context.buffer_params.pass_stride;
BufferParams buffer_params = context.buffer_params;
buffer_params.window_x = 0;
buffer_params.window_y = 0;
buffer_params.window_width = buffer_params.width;
buffer_params.window_height = buffer_params.height;
pass_accessor.get_render_tile_pixels(context.render_buffers, buffer_params, destination);
}
void DenoiserGPU::denoise_pass(DenoiseContext &context, PassType pass_type)
{
const BufferParams &buffer_params = context.buffer_params;
const DenoisePass pass(pass_type, buffer_params);
if (pass.noisy_offset == PASS_UNUSED) {
return;
}
if (pass.denoised_offset == PASS_UNUSED) {
LOG(DFATAL) << "Missing denoised pass " << pass_type_as_string(pass_type);
return;
}
if (pass.use_denoising_albedo) {
if (context.albedo_replaced_with_fake) {
LOG(ERROR) << "Pass which requires albedo is denoised after fake albedo has been set.";
return;
}
}
else if (context.use_guiding_passes && !context.albedo_replaced_with_fake) {
context.albedo_replaced_with_fake = true;
if (!denoise_filter_guiding_set_fake_albedo(context)) {
LOG(ERROR) << "Error replacing real albedo with the fake one.";
return;
}
}
/* Read and preprocess noisy color input pass. */
denoise_color_read(context, pass);
if (!denoise_filter_color_preprocess(context, pass)) {
LOG(ERROR) << "Error converting denoising passes to RGB buffer.";
return;
}
if (!denoise_run(context, pass)) {
LOG(ERROR) << "Error running denoiser.";
return;
}
/* Store result in the combined pass of the render buffer.
*
* This will scale the denoiser result up to match the number of, possibly per-pixel, samples. */
if (!denoise_filter_color_postprocess(context, pass)) {
LOG(ERROR) << "Error copying denoiser result to the denoised pass.";
return;
}
denoiser_queue_->synchronize();
}
CCL_NAMESPACE_END

View File

@@ -21,6 +21,9 @@ class DenoiserGPU : public Denoiser {
bool allow_inplace_modification) override;
protected:
class DenoisePass;
class DenoiseContext;
/* All the parameters needed to perform buffer denoising on a device.
* Is not really a task in its canonical terms (as in, is not an asynchronous running task). Is
* more like a wrapper for all the arguments and parameters needed to perform denoising. Is a
@@ -41,12 +44,105 @@ class DenoiserGPU : public Denoiser {
bool allow_inplace_modification;
};
/* Read input color pass from the render buffer into the memory which corresponds to the noisy
* input within the given context. Pixels are scaled to the number of samples, but are not
* preprocessed yet. */
void denoise_color_read(const DenoiseContext &context, const DenoisePass &pass);
/* Run corresponding filter kernels, preparing data for the denoiser or copying data from the
* denoiser result to the render buffer. */
bool denoise_filter_color_preprocess(const DenoiseContext &context, const DenoisePass &pass);
bool denoise_filter_color_postprocess(const DenoiseContext &context, const DenoisePass &pass);
bool denoise_filter_guiding_set_fake_albedo(const DenoiseContext &context);
void denoise_pass(DenoiseContext &context, PassType pass_type);
/* Returns true if task is fully handled. */
virtual bool denoise_buffer(const DenoiseTask & /*task*/) = 0;
virtual bool denoise_run(const DenoiseContext &context, const DenoisePass &pass) = 0;
virtual Device *ensure_denoiser_device(Progress *progress) override;
unique_ptr<DeviceQueue> denoiser_queue_;
class DenoisePass {
public:
DenoisePass(const PassType type, const BufferParams &buffer_params) : type(type)
{
noisy_offset = buffer_params.get_pass_offset(type, PassMode::NOISY);
denoised_offset = buffer_params.get_pass_offset(type, PassMode::DENOISED);
const PassInfo pass_info = Pass::get_info(type);
num_components = pass_info.num_components;
use_compositing = pass_info.use_compositing;
use_denoising_albedo = pass_info.use_denoising_albedo;
}
PassType type;
int noisy_offset;
int denoised_offset;
int num_components;
bool use_compositing;
bool use_denoising_albedo;
};
class DenoiseContext {
public:
explicit DenoiseContext(Device *device, const DenoiseTask &task);
const DenoiseParams &denoise_params;
RenderBuffers *render_buffers = nullptr;
const BufferParams &buffer_params;
/* Previous output. */
struct {
device_ptr device_pointer = 0;
int offset = PASS_UNUSED;
int stride = -1;
int pass_stride = -1;
} prev_output;
/* Device-side storage of the guiding passes. */
device_only_memory<float> guiding_buffer;
struct {
device_ptr device_pointer = 0;
/* NOTE: Are only initialized when the corresponding guiding pass is enabled. */
int pass_albedo = PASS_UNUSED;
int pass_normal = PASS_UNUSED;
int pass_flow = PASS_UNUSED;
int stride = -1;
int pass_stride = -1;
} guiding_params;
/* Number of input passes. Including the color and extra auxiliary passes. */
int num_input_passes = 0;
bool use_guiding_passes = false;
bool use_pass_albedo = false;
bool use_pass_normal = false;
bool use_pass_motion = false;
int num_samples = 0;
int pass_sample_count = PASS_UNUSED;
/* NOTE: Are only initialized when the corresponding guiding pass is enabled. */
int pass_denoising_albedo = PASS_UNUSED;
int pass_denoising_normal = PASS_UNUSED;
int pass_motion = PASS_UNUSED;
/* For passes which don't need albedo channel for denoising we replace the actual albedo with
* the (0.5, 0.5, 0.5). This flag indicates that the real albedo pass has been replaced with
* the fake values and denoising of passes which do need albedo can no longer happen. */
bool albedo_replaced_with_fake = false;
};
};
CCL_NAMESPACE_END

View File

@@ -218,154 +218,6 @@ uint OptiXDenoiser::get_device_type_mask() const
return DEVICE_MASK_OPTIX;
}
class OptiXDenoiser::DenoiseContext {
public:
explicit DenoiseContext(OptiXDevice *device, const DenoiseTask &task)
: denoise_params(task.params),
render_buffers(task.render_buffers),
buffer_params(task.buffer_params),
guiding_buffer(device, "denoiser guiding passes buffer", true),
num_samples(task.num_samples)
{
num_input_passes = 1;
if (denoise_params.use_pass_albedo) {
num_input_passes += 1;
use_pass_albedo = true;
pass_denoising_albedo = buffer_params.get_pass_offset(PASS_DENOISING_ALBEDO);
if (denoise_params.use_pass_normal) {
num_input_passes += 1;
use_pass_normal = true;
pass_denoising_normal = buffer_params.get_pass_offset(PASS_DENOISING_NORMAL);
}
}
if (denoise_params.temporally_stable) {
prev_output.device_pointer = render_buffers->buffer.device_pointer;
prev_output.offset = buffer_params.get_pass_offset(PASS_DENOISING_PREVIOUS);
prev_output.stride = buffer_params.stride;
prev_output.pass_stride = buffer_params.pass_stride;
num_input_passes += 1;
use_pass_motion = true;
pass_motion = buffer_params.get_pass_offset(PASS_MOTION);
}
use_guiding_passes = (num_input_passes - 1) > 0;
if (use_guiding_passes) {
if (task.allow_inplace_modification) {
guiding_params.device_pointer = render_buffers->buffer.device_pointer;
guiding_params.pass_albedo = pass_denoising_albedo;
guiding_params.pass_normal = pass_denoising_normal;
guiding_params.pass_flow = pass_motion;
guiding_params.stride = buffer_params.stride;
guiding_params.pass_stride = buffer_params.pass_stride;
}
else {
guiding_params.pass_stride = 0;
if (use_pass_albedo) {
guiding_params.pass_albedo = guiding_params.pass_stride;
guiding_params.pass_stride += 3;
}
if (use_pass_normal) {
guiding_params.pass_normal = guiding_params.pass_stride;
guiding_params.pass_stride += 3;
}
if (use_pass_motion) {
guiding_params.pass_flow = guiding_params.pass_stride;
guiding_params.pass_stride += 2;
}
guiding_params.stride = buffer_params.width;
guiding_buffer.alloc_to_device(buffer_params.width * buffer_params.height *
guiding_params.pass_stride);
guiding_params.device_pointer = guiding_buffer.device_pointer;
}
}
pass_sample_count = buffer_params.get_pass_offset(PASS_SAMPLE_COUNT);
}
const DenoiseParams &denoise_params;
RenderBuffers *render_buffers = nullptr;
const BufferParams &buffer_params;
/* Previous output. */
struct {
device_ptr device_pointer = 0;
int offset = PASS_UNUSED;
int stride = -1;
int pass_stride = -1;
} prev_output;
/* Device-side storage of the guiding passes. */
device_only_memory<float> guiding_buffer;
struct {
device_ptr device_pointer = 0;
/* NOTE: Are only initialized when the corresponding guiding pass is enabled. */
int pass_albedo = PASS_UNUSED;
int pass_normal = PASS_UNUSED;
int pass_flow = PASS_UNUSED;
int stride = -1;
int pass_stride = -1;
} guiding_params;
/* Number of input passes. Including the color and extra auxiliary passes. */
int num_input_passes = 0;
bool use_guiding_passes = false;
bool use_pass_albedo = false;
bool use_pass_normal = false;
bool use_pass_motion = false;
int num_samples = 0;
int pass_sample_count = PASS_UNUSED;
/* NOTE: Are only initialized when the corresponding guiding pass is enabled. */
int pass_denoising_albedo = PASS_UNUSED;
int pass_denoising_normal = PASS_UNUSED;
int pass_motion = PASS_UNUSED;
/* For passes which don't need albedo channel for denoising we replace the actual albedo with
* the (0.5, 0.5, 0.5). This flag indicates that the real albedo pass has been replaced with
* the fake values and denoising of passes which do need albedo can no longer happen. */
bool albedo_replaced_with_fake = false;
};
class OptiXDenoiser::DenoisePass {
public:
DenoisePass(const PassType type, const BufferParams &buffer_params) : type(type)
{
noisy_offset = buffer_params.get_pass_offset(type, PassMode::NOISY);
denoised_offset = buffer_params.get_pass_offset(type, PassMode::DENOISED);
const PassInfo pass_info = Pass::get_info(type);
num_components = pass_info.num_components;
use_compositing = pass_info.use_compositing;
use_denoising_albedo = pass_info.use_denoising_albedo;
}
PassType type;
int noisy_offset;
int denoised_offset;
int num_components;
bool use_compositing;
bool use_denoising_albedo;
};
bool OptiXDenoiser::denoise_buffer(const DenoiseTask &task)
{
OptiXDevice *const optix_device = static_cast<OptiXDevice *>(denoiser_device_);
@@ -421,151 +273,6 @@ bool OptiXDenoiser::denoise_filter_guiding_preprocess(const DenoiseContext &cont
return denoiser_queue_->enqueue(DEVICE_KERNEL_FILTER_GUIDING_PREPROCESS, work_size, args);
}
bool OptiXDenoiser::denoise_filter_guiding_set_fake_albedo(const DenoiseContext &context)
{
const BufferParams &buffer_params = context.buffer_params;
const int work_size = buffer_params.width * buffer_params.height;
DeviceKernelArguments args(&context.guiding_params.device_pointer,
&context.guiding_params.pass_stride,
&context.guiding_params.pass_albedo,
&buffer_params.width,
&buffer_params.height);
return denoiser_queue_->enqueue(DEVICE_KERNEL_FILTER_GUIDING_SET_FAKE_ALBEDO, work_size, args);
}
void OptiXDenoiser::denoise_pass(DenoiseContext &context, PassType pass_type)
{
const BufferParams &buffer_params = context.buffer_params;
const DenoisePass pass(pass_type, buffer_params);
if (pass.noisy_offset == PASS_UNUSED) {
return;
}
if (pass.denoised_offset == PASS_UNUSED) {
LOG(DFATAL) << "Missing denoised pass " << pass_type_as_string(pass_type);
return;
}
if (pass.use_denoising_albedo) {
if (context.albedo_replaced_with_fake) {
LOG(ERROR) << "Pass which requires albedo is denoised after fake albedo has been set.";
return;
}
}
else if (context.use_guiding_passes && !context.albedo_replaced_with_fake) {
context.albedo_replaced_with_fake = true;
if (!denoise_filter_guiding_set_fake_albedo(context)) {
LOG(ERROR) << "Error replacing real albedo with the fake one.";
return;
}
}
/* Read and preprocess noisy color input pass. */
denoise_color_read(context, pass);
if (!denoise_filter_color_preprocess(context, pass)) {
LOG(ERROR) << "Error converting denoising passes to RGB buffer.";
return;
}
if (!denoise_run(context, pass)) {
LOG(ERROR) << "Error running OptiX denoiser.";
return;
}
/* Store result in the combined pass of the render buffer.
*
* This will scale the denoiser result up to match the number of, possibly per-pixel, samples. */
if (!denoise_filter_color_postprocess(context, pass)) {
LOG(ERROR) << "Error copying denoiser result to the denoised pass.";
return;
}
denoiser_queue_->synchronize();
}
void OptiXDenoiser::denoise_color_read(const DenoiseContext &context, const DenoisePass &pass)
{
PassAccessor::PassAccessInfo pass_access_info;
pass_access_info.type = pass.type;
pass_access_info.mode = PassMode::NOISY;
pass_access_info.offset = pass.noisy_offset;
/* Denoiser operates on passes which are used to calculate the approximation, and is never used
* on the approximation. The latter is not even possible because OptiX does not support
* denoising of semi-transparent pixels. */
pass_access_info.use_approximate_shadow_catcher = false;
pass_access_info.use_approximate_shadow_catcher_background = false;
pass_access_info.show_active_pixels = false;
/* TODO(sergey): Consider adding support of actual exposure, to avoid clamping in extreme cases.
*/
const PassAccessorGPU pass_accessor(
denoiser_queue_.get(), pass_access_info, 1.0f, context.num_samples);
PassAccessor::Destination destination(pass_access_info.type);
destination.d_pixels = context.render_buffers->buffer.device_pointer +
pass.denoised_offset * sizeof(float);
destination.num_components = 3;
destination.pixel_stride = context.buffer_params.pass_stride;
BufferParams buffer_params = context.buffer_params;
buffer_params.window_x = 0;
buffer_params.window_y = 0;
buffer_params.window_width = buffer_params.width;
buffer_params.window_height = buffer_params.height;
pass_accessor.get_render_tile_pixels(context.render_buffers, buffer_params, destination);
}
bool OptiXDenoiser::denoise_filter_color_preprocess(const DenoiseContext &context,
const DenoisePass &pass)
{
const BufferParams &buffer_params = context.buffer_params;
const int work_size = buffer_params.width * buffer_params.height;
DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer,
&buffer_params.full_x,
&buffer_params.full_y,
&buffer_params.width,
&buffer_params.height,
&buffer_params.offset,
&buffer_params.stride,
&buffer_params.pass_stride,
&pass.denoised_offset);
return denoiser_queue_->enqueue(DEVICE_KERNEL_FILTER_COLOR_PREPROCESS, work_size, args);
}
bool OptiXDenoiser::denoise_filter_color_postprocess(const DenoiseContext &context,
const DenoisePass &pass)
{
const BufferParams &buffer_params = context.buffer_params;
const int work_size = buffer_params.width * buffer_params.height;
DeviceKernelArguments args(&context.render_buffers->buffer.device_pointer,
&buffer_params.full_x,
&buffer_params.full_y,
&buffer_params.width,
&buffer_params.height,
&buffer_params.offset,
&buffer_params.stride,
&buffer_params.pass_stride,
&context.num_samples,
&pass.noisy_offset,
&pass.denoised_offset,
&context.pass_sample_count,
&pass.num_components,
&pass.use_compositing);
return denoiser_queue_->enqueue(DEVICE_KERNEL_FILTER_COLOR_POSTPROCESS, work_size, args);
}
bool OptiXDenoiser::denoise_ensure(DenoiseContext &context)
{
if (!denoise_create_if_needed(context)) {

View File

@@ -21,9 +21,6 @@ class OptiXDenoiser : public DenoiserGPU {
virtual uint get_device_type_mask() const override;
private:
class DenoiseContext;
class DenoisePass;
virtual bool denoise_buffer(const DenoiseTask &task) override;
/* Read guiding passes from the render buffers, preprocess them in a way which is expected by
@@ -36,19 +33,6 @@ class OptiXDenoiser : public DenoiserGPU {
/* Set fake albedo pixels in the albedo guiding pass storage.
* After this point only passes which do not need albedo for denoising can be processed. */
bool denoise_filter_guiding_set_fake_albedo(const DenoiseContext &context);
void denoise_pass(DenoiseContext &context, PassType pass_type);
/* Read input color pass from the render buffer into the memory which corresponds to the noisy
* input within the given context. Pixels are scaled to the number of samples, but are not
* preprocessed yet. */
void denoise_color_read(const DenoiseContext &context, const DenoisePass &pass);
/* Run corresponding filter kernels, preparing data for the denoiser or copying data from the
* denoiser result to the render buffer. */
bool denoise_filter_color_preprocess(const DenoiseContext &context, const DenoisePass &pass);
bool denoise_filter_color_postprocess(const DenoiseContext &context, const DenoisePass &pass);
/* Make sure the OptiX denoiser is created and configured. */
bool denoise_ensure(DenoiseContext &context);
@@ -61,7 +45,7 @@ class OptiXDenoiser : public DenoiserGPU {
bool denoise_configure_if_needed(DenoiseContext &context);
/* Run configured denoiser. */
bool denoise_run(const DenoiseContext &context, const DenoisePass &pass);
bool denoise_run(const DenoiseContext &context, const DenoisePass &pass) override;
OptixDenoiser optix_denoiser_ = nullptr;