Compositor: Add derived resources optimization

This patch introduces a new Derived Resources concept to the compositor.
Derived resources are resources that are computed from a particular
result and cached in it in case it is needed by another operation, which
can greatly improve performance in some cases at the cost of more memory
usage.

The first use case is to store denoised versions of the Denoising Albedo
and Denoising Normals passes if auxiliary pass denoising is enabled in
the denoise node. Consequently, multi-pass denoising setups where the
same auxiliary passes are used in multiple denoise nodes should be much
faster due to caching of the derived resources.

This implementation has the limitation that it can't preemptively
invalidate the cache when the derived resources are no longer needed to
free up memory. This requires a special resource tracking mechanism that
need to happen during node tree compilation, and will be submitted
later. The limitation is not significant in the particular derived
resources that is currently implemented. Since the auxiliary passes are
rarely used outside of denoising.

Fixes #131171.

Pull Request: https://projects.blender.org/blender/blender/pulls/125671
This commit is contained in:
Omar Emara
2025-01-27 14:58:09 +01:00
committed by Omar Emara
parent e05a1871db
commit 2028bc8d7d
10 changed files with 346 additions and 61 deletions

View File

@@ -6,6 +6,7 @@ set(INC
.
algorithms
cached_resources
derived_resources
utilities
../gpu/intern
../makesrna
@@ -21,6 +22,7 @@ set(SRC
COM_compositor.hh
COM_context.hh
COM_conversion_operation.hh
COM_derived_resources.hh
COM_domain.hh
COM_evaluator.hh
COM_input_descriptor.hh
@@ -133,6 +135,10 @@ set(SRC
cached_resources/COM_symmetric_separable_blur_weights.hh
cached_resources/COM_van_vliet_gaussian_coefficients.hh
derived_resources/intern/denoised_auxiliary_pass.cc
derived_resources/COM_denoised_auxiliary_pass.hh
utilities/COM_utilities_diagonals.hh
utilities/COM_utilities_type_conversion.hh
)

View File

@@ -0,0 +1,36 @@
/* SPDX-FileCopyrightText: 2025 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma once
#include "COM_denoised_auxiliary_pass.hh"
namespace blender::compositor {
/* -------------------------------------------------------------------------------------------------
* Derived Resources.
*
* Derived resources are resources that are computed from a particular result, stored in it, and
* freed when the result is freed. The same resources might be needed by multiple operations, so
* caching them on the result will improve performance at the cost of higher memory usage.
*
* The DerivedResources class stores instances of the container classes that store derived
* resources. This is very similar in design to the StaticCacheManager, see its description for
* more information. Destroying an instance of this class is expected to destroy all derived
* resources in it.
*
* To add a new derived resource:
*
* - Create a key class that can be used to identify the resource in a Map if needed.
* - Create a resource class to compute and store the resource.
* - Create a container class to store the resources in a map identified by their keys.
* - Add an instance of the container to the DerivedResources class.
*
* See the existing derived resources for reference. */
class DerivedResources {
public:
DenoisedAuxiliaryPassContainer denoised_auxiliary_passes;
};
} // namespace blender::compositor

View File

@@ -18,6 +18,7 @@
#include "GPU_shader.hh"
#include "GPU_texture.hh"
#include "COM_derived_resources.hh"
#include "COM_domain.hh"
#include "COM_meta_data.hh"
@@ -97,7 +98,11 @@ enum class ResultStorageType : uint8_t {
*
* A result can wrap an external texture that is not allocated nor managed by the result. This is
* set up by a call to the wrap_external method. In that case, when the reference count eventually
* reach zero, the texture will not be freed. */
* reach zero, the texture will not be freed.
*
* A result may store resources that are computed and cached in case they are needed by multiple
* operations. Those are called Derived Resources and can be accessed using the derived_resources
* method. */
class Result {
private:
/* The context that the result was created within, this should be initialized during
@@ -163,6 +168,9 @@ class Result {
* context and should be released back into the pool instead of being freed. For CPU storage,
* this is irrelevant. */
bool is_from_pool_ = false;
/* Stores resources that are derived from this result. Lazily allocated if needed. See the class
* description for more information. */
DerivedResources *derived_resources_ = nullptr;
public:
/* Stores extra information about the result such as image meta data that can eventually be
@@ -332,6 +340,10 @@ class Result {
* operation. */
bool should_compute();
/* Returns a reference to the derived resources of the result, which is allocated if it was not
* allocated already. */
DerivedResources &derived_resources();
/* Returns the type of the result. */
ResultType type() const;

View File

@@ -0,0 +1,89 @@
/* SPDX-FileCopyrightText: 2025 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#pragma once
#ifdef WITH_OPENIMAGEDENOISE
# include <cstdint>
# include <memory>
# include <string>
# include "BLI_map.hh"
# include <OpenImageDenoise/oidn.hpp>
namespace blender::compositor {
class Context;
class Result;
enum class DenoisedAuxiliaryPassType : uint8_t {
Albedo,
Normal,
};
/* ------------------------------------------------------------------------------------------------
* Denoised Auxiliary Pass Key.
*/
class DenoisedAuxiliaryPassKey {
public:
DenoisedAuxiliaryPassType type;
oidn::Quality quality;
DenoisedAuxiliaryPassKey(const DenoisedAuxiliaryPassType type, const oidn::Quality quality);
uint64_t hash() const;
};
bool operator==(const DenoisedAuxiliaryPassKey &a, const DenoisedAuxiliaryPassKey &b);
/* -------------------------------------------------------------------------------------------------
* Denoised Auxiliary Pass.
*
* A derived result that stores a denoised version of the auxiliary pass of the given type using
* the given quality. */
class DenoisedAuxiliaryPass {
public:
float *denoised_buffer = nullptr;
public:
DenoisedAuxiliaryPass(Context &context,
const Result &pass,
const DenoisedAuxiliaryPassType type,
const oidn::Quality quality);
~DenoisedAuxiliaryPass();
};
/* ------------------------------------------------------------------------------------------------
* Denoised Auxiliary Pass Container.
*/
class DenoisedAuxiliaryPassContainer {
private:
Map<DenoisedAuxiliaryPassKey, std::unique_ptr<DenoisedAuxiliaryPass>> map_;
public:
/* Check if there is an available DenoisedAuxiliaryPass derived resource with the given
* parameters in the container, if one exists, return it, otherwise, return a newly created one
* and add it to the container. */
DenoisedAuxiliaryPass &get(Context &context,
const Result &pass,
const DenoisedAuxiliaryPassType type,
const oidn::Quality quality);
};
} // namespace blender::compositor
#else
namespace blender::compositor {
/* Building without OIDN, define a dummy container. User is not expected to use it if OIDN is not
* available. */
class DenoisedAuxiliaryPassContainer {};
} // namespace blender::compositor
#endif

View File

@@ -0,0 +1,133 @@
/* SPDX-FileCopyrightText: 2025 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#ifdef WITH_OPENIMAGEDENOISE
# include <cstdint>
# include <memory>
# include "BLI_assert.h"
# include "BLI_hash.hh"
# include "MEM_guardedalloc.h"
# include "GPU_texture.hh"
# include "COM_context.hh"
# include "COM_denoised_auxiliary_pass.hh"
# include "COM_result.hh"
# include <OpenImageDenoise/oidn.hpp>
namespace blender::compositor {
/* ------------------------------------------------------------------------------------------------
* Denoised Auxiliary Pass Key.
*/
DenoisedAuxiliaryPassKey::DenoisedAuxiliaryPassKey(const DenoisedAuxiliaryPassType type,
const oidn::Quality quality)
: type(type), quality(quality)
{
}
uint64_t DenoisedAuxiliaryPassKey::hash() const
{
return get_default_hash(this->type, this->quality);
}
bool operator==(const DenoisedAuxiliaryPassKey &a, const DenoisedAuxiliaryPassKey &b)
{
return a.type == b.type && a.quality == b.quality;
}
/* --------------------------------------------------------------------
* Denoised Auxiliary Pass.
*/
/* A callback to cancel the filter operations by evaluating the context's is_canceled method. The
* API specifies that true indicates the filter should continue, while false indicates it should
* stop, so invert the condition. This callback can also be used to track progress using the given
* n argument, but we currently don't make use of it. See OIDNProgressMonitorFunction in the API
* for more information. */
static bool oidn_progress_monitor_function(void *user_ptr, double /*n*/)
{
const Context *context = static_cast<const Context *>(user_ptr);
return !context->is_canceled();
}
static const char *get_pass_name(const DenoisedAuxiliaryPassType type)
{
switch (type) {
case DenoisedAuxiliaryPassType::Albedo:
return "albedo";
case DenoisedAuxiliaryPassType::Normal:
return "normal";
}
BLI_assert_unreachable();
return "";
}
DenoisedAuxiliaryPass::DenoisedAuxiliaryPass(Context &context,
const Result &pass,
const DenoisedAuxiliaryPassType type,
const oidn::Quality quality)
{
/* Assign the pass data to the denoised buffer since we will be denoising in place. */
if (context.use_gpu()) {
GPU_memory_barrier(GPU_BARRIER_TEXTURE_UPDATE);
this->denoised_buffer = static_cast<float *>(GPU_texture_read(pass, GPU_DATA_FLOAT, 0));
}
else {
this->denoised_buffer = static_cast<float *>(MEM_dupallocN(pass.float_texture()));
}
const int width = pass.domain().size.x;
const int height = pass.domain().size.y;
const int pixel_stride = sizeof(float) * 4;
oidn::DeviceRef device = oidn::newDevice(oidn::DeviceType::CPU);
device.commit();
/* Denoise the pass in place, so set it to both the input and output. */
oidn::FilterRef filter = device.newFilter("RT");
filter.setImage(get_pass_name(type),
this->denoised_buffer,
oidn::Format::Float3,
width,
height,
0,
pixel_stride);
filter.setImage(
"output", this->denoised_buffer, oidn::Format::Float3, width, height, 0, pixel_stride);
filter.set("quality", quality);
filter.setProgressMonitorFunction(oidn_progress_monitor_function, &context);
filter.commit();
filter.execute();
}
DenoisedAuxiliaryPass::~DenoisedAuxiliaryPass()
{
MEM_freeN(this->denoised_buffer);
}
/* --------------------------------------------------------------------
* Denoised Auxiliary Pass Container.
*/
DenoisedAuxiliaryPass &DenoisedAuxiliaryPassContainer::get(Context &context,
const Result &pass,
const DenoisedAuxiliaryPassType type,
const oidn::Quality quality)
{
const DenoisedAuxiliaryPassKey key(type, quality);
return *map_.lookup_or_add_cb(key, [&]() {
return std::make_unique<DenoisedAuxiliaryPass>(context, pass, type, quality);
});
}
} // namespace blender::compositor
#endif

View File

@@ -484,6 +484,9 @@ void Result::free()
integer_texture_ = nullptr;
break;
}
delete derived_resources_;
derived_resources_ = nullptr;
}
bool Result::should_compute()
@@ -491,6 +494,14 @@ bool Result::should_compute()
return initial_reference_count_ != 0;
}
DerivedResources &Result::derived_resources()
{
if (!derived_resources_) {
derived_resources_ = new DerivedResources();
}
return *derived_resources_;
}
ResultType Result::type() const
{
return type_;

View File

@@ -14,6 +14,7 @@ set(INC
../compositor
../compositor/algorithms
../compositor/cached_resources
../compositor/derived_resources
../../../intern/opensubdiv
)

View File

@@ -11,6 +11,7 @@ set(INC
../../makesrna
../../compositor/algorithms
../../compositor/cached_resources
../../compositor/derived_resources
../../compositor/utilities
# RNA_prototypes.hh

View File

@@ -18,6 +18,7 @@
#include "DNA_node_types.h"
#include "COM_denoised_auxiliary_pass.hh"
#include "COM_node_operation.hh"
#include "COM_utilities.hh"
@@ -131,14 +132,17 @@ class DenoiseOperation : public NodeOperation {
const int pixel_stride = sizeof(float) * 4;
const eGPUDataFormat data_format = GPU_DATA_FLOAT;
Vector<float *> temporary_buffers_to_free;
float *input_color = nullptr;
float *output_color = nullptr;
if (this->context().use_gpu()) {
/* Download the input texture and set it as both the input and output of the filter to
* denoise it in-place. */
* denoise it in-place. Make sure to track the downloaded buffer to be later freed. */
GPU_memory_barrier(GPU_BARRIER_TEXTURE_UPDATE);
input_color = static_cast<float *>(GPU_texture_read(input_image, data_format, 0));
output_color = input_color;
temporary_buffers_to_free.append(input_color);
}
else {
input_color = input_image.float_texture();
@@ -152,57 +156,56 @@ class DenoiseOperation : public NodeOperation {
this->set_filter_quality(filter);
filter.setProgressMonitorFunction(oidn_progress_monitor_function, &context());
/* If the albedo input is not a single value input, download the albedo texture, denoise it
* in-place if denoising auxiliary passes is needed, and set it to the main filter. */
float *albedo = nullptr;
Result &input_albedo = get_input("Albedo");
/* If the albedo input is not a single value input, set it to the albedo input of the filter,
* denoising it if needed. */
Result &input_albedo = this->get_input("Albedo");
if (!input_albedo.is_single_value()) {
if (this->context().use_gpu()) {
albedo = static_cast<float *>(GPU_texture_read(input_albedo, data_format, 0));
float *albedo = nullptr;
if (this->should_denoise_auxiliary_passes()) {
albedo = input_albedo.derived_resources()
.denoised_auxiliary_passes
.get(this->context(),
input_albedo,
DenoisedAuxiliaryPassType::Albedo,
this->get_quality())
.denoised_buffer;
}
else {
albedo = input_albedo.float_texture();
}
if (should_denoise_auxiliary_passes()) {
oidn::FilterRef albedoFilter = device.newFilter("RT");
this->set_filter_quality(albedoFilter);
albedoFilter.setImage(
"albedo", albedo, oidn::Format::Float3, width, height, 0, pixel_stride);
albedoFilter.setImage(
"output", albedo, oidn::Format::Float3, width, height, 0, pixel_stride);
albedoFilter.setProgressMonitorFunction(oidn_progress_monitor_function, &context());
albedoFilter.commit();
albedoFilter.execute();
if (this->context().use_gpu()) {
albedo = static_cast<float *>(GPU_texture_read(input_albedo, data_format, 0));
temporary_buffers_to_free.append(albedo);
}
else {
albedo = input_albedo.float_texture();
}
}
filter.setImage("albedo", albedo, oidn::Format::Float3, width, height, 0, pixel_stride);
}
/* If the albedo and normal inputs are not single value inputs, download the normal texture,
* denoise it in-place if denoising auxiliary passes is needed, and set it to the main filter.
* Notice that we also consider the albedo input because OIDN doesn't support denoising with
* only the normal auxiliary pass. */
float *normal = nullptr;
Result &input_normal = get_input("Normal");
if (albedo && !input_normal.is_single_value()) {
if (this->context().use_gpu()) {
normal = static_cast<float *>(GPU_texture_read(input_normal, data_format, 0));
/* If the albedo and normal inputs are not single value inputs, set the normal input to the
* albedo input of the filter, denoising it if needed. Notice that we also consider the albedo
* input because OIDN doesn't support denoising with only the normal auxiliary pass. */
Result &input_normal = this->get_input("Normal");
if (!input_albedo.is_single_value() && !input_normal.is_single_value()) {
float *normal = nullptr;
if (should_denoise_auxiliary_passes()) {
normal = input_normal.derived_resources()
.denoised_auxiliary_passes
.get(this->context(),
input_normal,
DenoisedAuxiliaryPassType::Normal,
this->get_quality())
.denoised_buffer;
}
else {
normal = input_normal.float_texture();
}
if (should_denoise_auxiliary_passes()) {
oidn::FilterRef normalFilter = device.newFilter("RT");
this->set_filter_quality(normalFilter);
normalFilter.setImage(
"normal", normal, oidn::Format::Float3, width, height, 0, pixel_stride);
normalFilter.setImage(
"output", normal, oidn::Format::Float3, width, height, 0, pixel_stride);
normalFilter.setProgressMonitorFunction(oidn_progress_monitor_function, &context());
normalFilter.commit();
normalFilter.execute();
if (this->context().use_gpu()) {
normal = static_cast<float *>(GPU_texture_read(input_normal, data_format, 0));
temporary_buffers_to_free.append(normal);
}
else {
normal = input_normal.float_texture();
}
}
filter.setImage("normal", normal, oidn::Format::Float3, width, height, 0, pixel_stride);
@@ -224,16 +227,8 @@ class DenoiseOperation : public NodeOperation {
});
}
/* Buffers for the CPU case are owned by the inputs, while for GPU, they are temporally read
* from the GPU texture, so they need to be freed if they were read. */
if (this->context().use_gpu()) {
MEM_freeN(input_color);
if (albedo) {
MEM_freeN(albedo);
}
if (normal) {
MEM_freeN(normal);
}
for (float *buffer : temporary_buffers_to_free) {
MEM_freeN(buffer);
}
#endif
}
@@ -268,7 +263,7 @@ class DenoiseOperation : public NodeOperation {
#ifdef WITH_OPENIMAGEDENOISE
# if OIDN_VERSION_MAJOR >= 2
OIDNQuality get_quality()
oidn::Quality get_quality()
{
const CMPNodeDenoiseQuality node_quality = static_cast<CMPNodeDenoiseQuality>(
node_storage(bnode()).quality);
@@ -278,26 +273,26 @@ class DenoiseOperation : public NodeOperation {
switch (scene_quality) {
# if OIDN_VERSION >= 20300
case SCE_COMPOSITOR_DENOISE_FAST:
return OIDN_QUALITY_FAST;
return oidn::Quality::Fast;
# endif
case SCE_COMPOSITOR_DENOISE_BALANCED:
return OIDN_QUALITY_BALANCED;
return oidn::Quality::Balanced;
case SCE_COMPOSITOR_DENOISE_HIGH:
default:
return OIDN_QUALITY_HIGH;
return oidn::Quality::High;
}
}
switch (node_quality) {
# if OIDN_VERSION >= 20300
case CMP_NODE_DENOISE_QUALITY_FAST:
return OIDN_QUALITY_FAST;
return oidn::Quality::Fast;
# endif
case CMP_NODE_DENOISE_QUALITY_BALANCED:
return OIDN_QUALITY_BALANCED;
return oidn::Quality::Balanced;
case CMP_NODE_DENOISE_QUALITY_HIGH:
default:
return OIDN_QUALITY_HIGH;
return oidn::Quality::High;
}
}
# endif /* OIDN_VERSION_MAJOR >= 2 */
@@ -305,7 +300,7 @@ class DenoiseOperation : public NodeOperation {
void set_filter_quality([[maybe_unused]] oidn::FilterRef &filter)
{
# if OIDN_VERSION_MAJOR >= 2
OIDNQuality quality = this->get_quality();
oidn::Quality quality = this->get_quality();
filter.set("quality", quality);
# endif
}

View File

@@ -8,6 +8,7 @@ set(INC
intern
../compositor
../compositor/cached_resources
../compositor/derived_resources
../draw/intern
../gpu/intern
../makesrna