Realtime Compositor: Implement Z Combine node

This patch implements the Z Combine node for the realtime compositor.
The patch also extends the SMAA implementation to work with float
textures as a prerequisite to the Z Combine implementation. Moreover, a
mechanism for computing multi-output operations was implemented, in
which unneeded outputs will allocate a dummy 1x1 texture for a correct
shader invocation, then those dummy textures will be cleaned up by
calling a routine right after evaluation.

This is different from the CPU implementation in that the while combine
mask is anti-aliased, including the alpha mask, which is not considered
in the CPU case.

The node can be implemented as a GPU shader operation when the
anti-aliasing option is disabled, which is something we should do when
the evaluator allows nodes be executed as both standard and GPU shader
operations.

Pull Request: https://projects.blender.org/blender/blender/pulls/106637
This commit is contained in:
Omar Emara
2023-04-09 09:06:41 +02:00
committed by Omar Emara
parent 7a267aa000
commit b939b60c3f
13 changed files with 380 additions and 20 deletions

View File

@@ -136,6 +136,9 @@ set(GLSL_SRC
shaders/compositor_tone_map_photoreceptor.glsl
shaders/compositor_tone_map_simple.glsl
shaders/compositor_write_output.glsl
shaders/compositor_z_combine_compute_mask.glsl
shaders/compositor_z_combine_from_mask.glsl
shaders/compositor_z_combine_simple.glsl
shaders/library/gpu_shader_compositor_alpha_over.glsl
shaders/library/gpu_shader_compositor_blur_common.glsl
@@ -226,6 +229,7 @@ set(SRC_SHADER_CREATE_INFOS
shaders/infos/compositor_tone_map_photoreceptor_info.hh
shaders/infos/compositor_tone_map_simple_info.hh
shaders/infos/compositor_write_output_info.hh
shaders/infos/compositor_z_combine_info.hh
)
set(SHADER_CREATE_INFOS_CONTENT "")

View File

@@ -170,6 +170,12 @@ class Operation {
* evaluation of the operation to declare that the results are no longer needed by this
* operation. */
void release_inputs();
/* Release the results that were allocated in the execute method but are not actually needed.
* This can be the case if the execute method allocated a dummy texture for an unndeeded result,
* see the description of Result::allocate_texture() for more information. This is called after
* the evaluation of the operation. */
void release_unneeded_results();
};
} // namespace blender::realtime_compositor

View File

@@ -112,7 +112,18 @@ class Result {
/* Declare the result to be a texture result, allocate a texture of an appropriate type with
* the size of the given domain from the result's texture pool, and set the domain of the result
* to the given domain. */
* to the given domain.
*
* If the result should not be computed, that is, should_compute() returns false, yet this method
* is called, that means the result is only being allocated because the shader that computes it
* also computes another result that is actually needed, and shaders needs to have a texture
* bound to all their images units for a correct invocation, even if some of those textures are
* not needed and will eventually be discarded. In that case, since allocating the full texture
* is not needed, allocate_single_value() is called instead and the reference count is set to 1.
* This essentially allocates a dummy 1x1 texture, which works because out of bound shader writes
* to images are safe. Since this result is not referenced by any other operation, it should be
* manually released after the operation is evaluated, which is implemented by calling the
* Operation::release_unneeded_results() method. */
void allocate_texture(Domain domain);
/* Declare the result to be a single value result, allocate a texture of an appropriate
@@ -228,6 +239,9 @@ class Result {
/* Returns true if the result is a single value and false of it is a texture. */
bool is_single_value() const;
/* Returns true if the result is allocated. */
bool is_allocated() const;
/* Returns the allocated GPU texture of the result. */
GPUTexture *texture() const;

View File

@@ -13,8 +13,8 @@ namespace blender::realtime_compositor {
void smaa(Context &context,
Result &input,
Result &output,
float threshold,
float local_contrast_adaptation_factor,
int corner_rounding);
float threshold = 0.1f,
float local_contrast_adaptation_factor = 2.0f,
int corner_rounding = 25);
} // namespace blender::realtime_compositor

View File

@@ -6,6 +6,7 @@
#include "GPU_texture.h"
#include "COM_context.hh"
#include "COM_result.hh"
#include "COM_utilities.hh"
#include "COM_algorithm_smaa.hh"
@@ -22,10 +23,25 @@ static Result detect_edges(Context &context,
GPUShader *shader = context.shader_manager().get("compositor_smaa_edge_detection");
GPU_shader_bind(shader);
float luminance_coefficients[3];
IMB_colormanagement_get_luminance_coefficients(luminance_coefficients);
switch (input.type()) {
case ResultType::Color: {
float luminance_coefficients[3];
IMB_colormanagement_get_luminance_coefficients(luminance_coefficients);
GPU_shader_uniform_3fv(shader, "luminance_coefficients", luminance_coefficients);
break;
}
case ResultType::Vector: {
float luminance_coefficients[3] = {1.0f, 1.0f, 1.0f};
GPU_shader_uniform_3fv(shader, "luminance_coefficients", luminance_coefficients);
break;
}
case ResultType::Float: {
float luminance_coefficients[3] = {1.0f, 0.0f, 0.0f};
GPU_shader_uniform_3fv(shader, "luminance_coefficients", luminance_coefficients);
break;
}
}
GPU_shader_uniform_3fv(shader, "luminance_coefficients", luminance_coefficients);
GPU_shader_uniform_1f(shader, "smaa_threshold", threshold);
GPU_shader_uniform_1f(
shader, "smaa_local_contrast_adaptation_factor", local_contrast_adaptation_factor);
@@ -78,7 +94,9 @@ static Result calculate_blending_weights(Context &context, Result &edges, int co
static void blend_neighborhood(Context &context, Result &input, Result &weights, Result &output)
{
GPUShader *shader = context.shader_manager().get("compositor_smaa_neighborhood_blending");
GPUShader *shader = context.shader_manager().get(
input.type() == ResultType::Float ? "compositor_smaa_neighborhood_blending_float" :
"compositor_smaa_neighborhood_blending_color");
GPU_shader_bind(shader);
GPU_texture_filter_mode(input.texture(), true);

View File

@@ -34,6 +34,8 @@ void Operation::evaluate()
execute();
release_inputs();
release_unneeded_results();
}
Result &Operation::get_result(StringRef identifier)
@@ -201,4 +203,13 @@ void Operation::release_inputs()
}
}
void Operation::release_unneeded_results()
{
for (Result &result : results_.values()) {
if (!result.should_compute() && result.is_allocated()) {
result.release();
}
}
}
} // namespace blender::realtime_compositor

View File

@@ -21,12 +21,21 @@ Result::Result(ResultType type, TexturePool &texture_pool)
Result Result::Temporary(ResultType type, TexturePool &texture_pool)
{
Result result = Result(type, texture_pool);
result.increment_reference_count();
result.set_initial_reference_count(1);
result.reset();
return result;
}
void Result::allocate_texture(Domain domain)
{
/* The result is not actually needed, so allocate a dummy single value texture instead. See the
* method description for more information. */
if (!should_compute()) {
allocate_single_value();
increment_reference_count();
return;
}
is_single_value_ = false;
switch (type_) {
case ResultType::Float:
@@ -247,6 +256,11 @@ bool Result::is_single_value() const
return is_single_value_;
}
bool Result::is_allocated() const
{
return texture_ != nullptr;
}
GPUTexture *Result::texture() const
{
return texture_;

View File

@@ -0,0 +1,18 @@
#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
void main()
{
ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
vec4 first_color = texture_load(first_tx, texel);
float first_z_value = texture_load(first_z_tx, texel).x;
float second_z_value = texture_load(second_z_tx, texel).x;
/* The same logic as in compositor_z_combine_simple.glsl but only computes the mask to be later
* anti-aliased and used for mixing, see the logic in that file for more information. */
float z_combine_factor = float(first_z_value < second_z_value);
float alpha_factor = use_alpha ? first_color.a : 1.0;
float mix_factor = z_combine_factor * alpha_factor;
imageStore(mask_img, texel, vec4(mix_factor));
}

View File

@@ -0,0 +1,21 @@
#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
void main()
{
ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
vec4 first_color = texture_load(first_tx, texel);
vec4 second_color = texture_load(second_tx, texel);
float first_z_value = texture_load(first_z_tx, texel).x;
float second_z_value = texture_load(second_z_tx, texel).x;
float mask_value = texture_load(mask_tx, texel).x;
vec4 combined_color = mix(second_color, first_color, mask_value);
/* Use the more opaque alpha from the two images. */
combined_color.a = use_alpha ? max(second_color.a, first_color.a) : combined_color.a;
float combined_z = mix(second_z_value, first_z_value, mask_value);
imageStore(combined_img, texel, combined_color);
imageStore(combined_z_img, texel, vec4(combined_z));
}

View File

@@ -0,0 +1,29 @@
#pragma BLENDER_REQUIRE(gpu_shader_compositor_texture_utilities.glsl)
void main()
{
ivec2 texel = ivec2(gl_GlobalInvocationID.xy);
vec4 first_color = texture_load(first_tx, texel);
vec4 second_color = texture_load(second_tx, texel);
float first_z_value = texture_load(first_z_tx, texel).x;
float second_z_value = texture_load(second_z_tx, texel).x;
/* Mix between the first and second images using a mask such that the image with the object
* closer to the camera is returned. The mask value is then 1, and thus returns the first image
* if its Z value is less than that of the second image. Otherwise, its value is 0, and thus
* returns the second image. Furthermore, if the object in the first image is closer but has a
* non-opaque alpha, then the alpha is used as a mask, but only if Use Alpha is enabled. */
float z_combine_factor = float(first_z_value < second_z_value);
float alpha_factor = use_alpha ? first_color.a : 1.0;
float mix_factor = z_combine_factor * alpha_factor;
vec4 combined_color = mix(second_color, first_color, mix_factor);
/* Use the more opaque alpha from the two images. */
combined_color.a = use_alpha ? max(second_color.a, first_color.a) : combined_color.a;
float combined_z = mix(second_z_value, first_z_value, mix_factor);
imageStore(combined_img, texel, combined_color);
imageStore(combined_z_img, texel, vec4(combined_z));
}

View File

@@ -32,13 +32,21 @@ GPU_SHADER_CREATE_INFO(compositor_smaa_blending_weight_calculation)
.compute_source("compositor_smaa_blending_weight_calculation.glsl")
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(compositor_smaa_neighborhood_blending)
GPU_SHADER_CREATE_INFO(compositor_smaa_neighborhood_blending_shared)
.local_group_size(16, 16)
.define("SMAA_GLSL_3")
.define("SMAA_RT_METRICS",
"vec4(1.0 / vec2(textureSize(input_tx, 0)), vec2(textureSize(input_tx, 0)))")
.sampler(0, ImageType::FLOAT_2D, "input_tx")
.sampler(1, ImageType::FLOAT_2D, "weights_tx")
.compute_source("compositor_smaa_neighborhood_blending.glsl");
GPU_SHADER_CREATE_INFO(compositor_smaa_neighborhood_blending_color)
.additional_info("compositor_smaa_neighborhood_blending_shared")
.image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
.compute_source("compositor_smaa_neighborhood_blending.glsl")
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(compositor_smaa_neighborhood_blending_float)
.additional_info("compositor_smaa_neighborhood_blending_shared")
.image(0, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "output_img")
.do_static_compilation(true);

View File

@@ -0,0 +1,38 @@
/* SPDX-License-Identifier: GPL-2.0-or-later */
#include "gpu_shader_create_info.hh"
GPU_SHADER_CREATE_INFO(compositor_z_combine_simple)
.local_group_size(16, 16)
.push_constant(Type::BOOL, "use_alpha")
.sampler(0, ImageType::FLOAT_2D, "first_tx")
.sampler(1, ImageType::FLOAT_2D, "first_z_tx")
.sampler(2, ImageType::FLOAT_2D, "second_tx")
.sampler(3, ImageType::FLOAT_2D, "second_z_tx")
.image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "combined_img")
.image(1, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "combined_z_img")
.compute_source("compositor_z_combine_simple.glsl")
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(compositor_z_combine_compute_mask)
.local_group_size(16, 16)
.push_constant(Type::BOOL, "use_alpha")
.sampler(0, ImageType::FLOAT_2D, "first_tx")
.sampler(1, ImageType::FLOAT_2D, "first_z_tx")
.sampler(2, ImageType::FLOAT_2D, "second_z_tx")
.image(0, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "mask_img")
.compute_source("compositor_z_combine_compute_mask.glsl")
.do_static_compilation(true);
GPU_SHADER_CREATE_INFO(compositor_z_combine_from_mask)
.local_group_size(16, 16)
.push_constant(Type::BOOL, "use_alpha")
.sampler(0, ImageType::FLOAT_2D, "first_tx")
.sampler(1, ImageType::FLOAT_2D, "first_z_tx")
.sampler(2, ImageType::FLOAT_2D, "second_tx")
.sampler(3, ImageType::FLOAT_2D, "second_z_tx")
.sampler(4, ImageType::FLOAT_2D, "mask_tx")
.image(0, GPU_RGBA16F, Qualifier::WRITE, ImageType::FLOAT_2D, "combined_img")
.image(1, GPU_R16F, Qualifier::WRITE, ImageType::FLOAT_2D, "combined_z_img")
.compute_source("compositor_z_combine_from_mask.glsl")
.do_static_compilation(true);

View File

@@ -5,13 +5,20 @@
* \ingroup cmpnodes
*/
#include "BLI_math_base.hh"
#include "BLI_math_vector.hh"
#include "BLI_math_vector_types.hh"
#include "BLT_translation.h"
#include "UI_interface.h"
#include "UI_resources.h"
#include "COM_algorithm_smaa.hh"
#include "COM_node_operation.hh"
#include "GPU_shader.h"
#include "node_composite_util.hh"
/* **************** Z COMBINE ******************** */
@@ -20,10 +27,22 @@ namespace blender::nodes::node_composite_zcombine_cc {
static void cmp_node_zcombine_declare(NodeDeclarationBuilder &b)
{
b.add_input<decl::Color>(N_("Image")).default_value({1.0f, 1.0f, 1.0f, 1.0f});
b.add_input<decl::Float>(N_("Z")).default_value(1.0f).min(0.0f).max(10000.0f);
b.add_input<decl::Color>(N_("Image"), "Image_001").default_value({1.0f, 1.0f, 1.0f, 1.0f});
b.add_input<decl::Float>(N_("Z"), "Z_001").default_value(1.0f).min(0.0f).max(10000.0f);
b.add_input<decl::Color>(N_("Image"))
.default_value({1.0f, 1.0f, 1.0f, 1.0f})
.compositor_domain_priority(0);
b.add_input<decl::Float>(N_("Z"))
.default_value(1.0f)
.min(0.0f)
.max(10000.0f)
.compositor_domain_priority(2);
b.add_input<decl::Color>(N_("Image"), "Image_001")
.default_value({1.0f, 1.0f, 1.0f, 1.0f})
.compositor_domain_priority(1);
b.add_input<decl::Float>(N_("Z"), "Z_001")
.default_value(1.0f)
.min(0.0f)
.max(10000.0f)
.compositor_domain_priority(3);
b.add_output<decl::Color>(N_("Image"));
b.add_output<decl::Float>(N_("Z"));
}
@@ -45,9 +64,171 @@ class ZCombineOperation : public NodeOperation {
void execute() override
{
get_input("Image").pass_through(get_result("Image"));
get_result("Z").allocate_invalid();
context().set_info_message("Viewport compositor setup not fully supported");
if (compute_domain().size == int2(1)) {
execute_single_value();
}
else if (use_anti_aliasing()) {
execute_anti_aliased();
}
else {
execute_simple();
}
}
void execute_single_value()
{
const float4 first_color = get_input("Image").get_color_value();
const float4 second_color = get_input("Image_001").get_color_value();
const float first_z_value = get_input("Z").get_float_value();
const float second_z_value = get_input("Z_001").get_float_value();
/* Mix between the first and second images using a mask such that the image with the object
* closer to the camera is returned. The mask value is then 1, and thus returns the first image
* if its Z value is less than that of the second image. Otherwise, its value is 0, and thus
* returns the second image. Furthermore, if the object in the first image is closer but has a
* non-opaque alpha, then the alpha is used as a mask, but only if Use Alpha is enabled. */
const float z_combine_factor = float(first_z_value < second_z_value);
const float alpha_factor = use_alpha() ? first_color.w : 1.0f;
const float mix_factor = z_combine_factor * alpha_factor;
Result &combined = get_result("Image");
if (combined.should_compute()) {
float4 combined_color = math::interpolate(second_color, first_color, mix_factor);
/* Use the more opaque alpha from the two images. */
combined_color.w = use_alpha() ? math::max(second_color.w, first_color.w) : combined_color.w;
combined.allocate_single_value();
combined.set_color_value(combined_color);
}
Result &combined_z = get_result("Z");
if (combined_z.should_compute()) {
const float combined_z_value = math::interpolate(second_z_value, first_z_value, mix_factor);
combined_z.allocate_single_value();
combined_z.set_float_value(combined_z_value);
}
}
void execute_simple()
{
GPUShader *shader = shader_manager().get("compositor_z_combine_simple");
GPU_shader_bind(shader);
GPU_shader_uniform_1b(shader, "use_alpha", use_alpha());
const Result &first = get_input("Image");
first.bind_as_texture(shader, "first_tx");
const Result &first_z = get_input("Z");
first_z.bind_as_texture(shader, "first_z_tx");
const Result &second = get_input("Image_001");
second.bind_as_texture(shader, "second_tx");
const Result &second_z = get_input("Z_001");
second_z.bind_as_texture(shader, "second_z_tx");
Result &combined = get_result("Image");
const Domain domain = compute_domain();
combined.allocate_texture(domain);
combined.bind_as_image(shader, "combined_img");
Result &combined_z = get_result("Z");
combined_z.allocate_texture(domain);
combined_z.bind_as_image(shader, "combined_z_img");
compute_dispatch_threads_at_least(shader, domain.size);
first.unbind_as_texture();
first_z.unbind_as_texture();
second.unbind_as_texture();
second_z.unbind_as_texture();
combined.unbind_as_image();
combined_z.unbind_as_image();
GPU_shader_unbind();
}
void execute_anti_aliased()
{
Result mask = compute_mask();
GPUShader *shader = shader_manager().get("compositor_z_combine_from_mask");
GPU_shader_bind(shader);
GPU_shader_uniform_1b(shader, "use_alpha", use_alpha());
const Result &first = get_input("Image");
first.bind_as_texture(shader, "first_tx");
const Result &first_z = get_input("Z");
first_z.bind_as_texture(shader, "first_z_tx");
const Result &second = get_input("Image_001");
second.bind_as_texture(shader, "second_tx");
const Result &second_z = get_input("Z_001");
second_z.bind_as_texture(shader, "second_z_tx");
mask.bind_as_texture(shader, "mask_tx");
Result &combined = get_result("Image");
const Domain domain = compute_domain();
combined.allocate_texture(domain);
combined.bind_as_image(shader, "combined_img");
Result &combined_z = get_result("Z");
combined_z.allocate_texture(domain);
combined_z.bind_as_image(shader, "combined_z_img");
compute_dispatch_threads_at_least(shader, domain.size);
first.unbind_as_texture();
first_z.unbind_as_texture();
second.unbind_as_texture();
second_z.unbind_as_texture();
mask.unbind_as_texture();
combined.unbind_as_image();
combined_z.unbind_as_image();
GPU_shader_unbind();
mask.release();
}
Result compute_mask()
{
GPUShader *shader = shader_manager().get("compositor_z_combine_compute_mask");
GPU_shader_bind(shader);
GPU_shader_uniform_1b(shader, "use_alpha", use_alpha());
const Result &first = get_input("Image");
first.bind_as_texture(shader, "first_tx");
const Result &first_z = get_input("Z");
first_z.bind_as_texture(shader, "first_z_tx");
const Result &second_z = get_input("Z_001");
second_z.bind_as_texture(shader, "second_z_tx");
const Domain domain = compute_domain();
Result mask = Result::Temporary(ResultType::Float, texture_pool());
mask.allocate_texture(domain);
mask.bind_as_image(shader, "mask_img");
compute_dispatch_threads_at_least(shader, domain.size);
first.unbind_as_texture();
first_z.unbind_as_texture();
second_z.unbind_as_texture();
mask.unbind_as_image();
GPU_shader_unbind();
Result anti_aliased_mask = Result::Temporary(ResultType::Float, texture_pool());
smaa(context(), mask, anti_aliased_mask);
mask.release();
return anti_aliased_mask;
}
bool use_alpha()
{
return bnode().custom1 != 0;
}
bool use_anti_aliasing()
{
return bnode().custom2 == 0;
}
};
@@ -68,8 +249,6 @@ void register_node_type_cmp_zcombine()
ntype.declare = file_ns::cmp_node_zcombine_declare;
ntype.draw_buttons = file_ns::node_composit_buts_zcombine;
ntype.get_compositor_operation = file_ns::get_compositor_operation;
ntype.realtime_compositor_unsupported_message = N_(
"Node not supported in the Viewport compositor");
nodeRegisterType(&ntype);
}