Compositor: Add Composite node for new CPU compositor

This patch implements the Composite node for the new CPU compositor.
This is essentially equivalent to the Viewer node commit.
This commit is contained in:
Omar Emara
2024-08-22 14:48:52 +03:00
parent b0390cae09
commit 997ab86906
5 changed files with 173 additions and 84 deletions

View File

@@ -81,9 +81,8 @@ class Context {
* since the region can be zero sized. */
virtual rcti get_compositing_region() const = 0;
/* Get the texture where the result of the compositor should be written. This should be called by
* the composite output node to get its target texture. */
virtual GPUTexture *get_output_texture() = 0;
/* Get the result where the result of the compositor should be written. */
virtual Result get_output_result() = 0;
/* Get the result where the result of the compositor viewer should be written, given the domain
* of the result to be viewed and whether the output is a non-color data image. */

View File

@@ -133,9 +133,12 @@ class Context : public realtime_compositor::Context {
return visible_camera_region;
}
GPUTexture *get_output_texture() override
realtime_compositor::Result get_output_result() override
{
return DRW_viewport_texture_list_get()->color;
realtime_compositor::Result result = this->create_result(
realtime_compositor::ResultType::Color, realtime_compositor::ResultPrecision::Half);
result.wrap_external(DRW_viewport_texture_list_get()->color);
return result;
}
realtime_compositor::Result get_viewer_output_result(realtime_compositor::Domain /*domain*/,

View File

@@ -6,6 +6,7 @@
* \ingroup cmpnodes
*/
#include "BLI_bounds_types.hh"
#include "BLI_math_vector_types.hh"
#include "UI_interface.hh"
@@ -77,103 +78,185 @@ class CompositeOperation : public NodeOperation {
color.w = alpha.get_float_value();
}
GPU_texture_clear(context().get_output_texture(), GPU_DATA_FLOAT, color);
const Domain domain = compute_domain();
Result output = context().get_output_result();
if (this->context().use_gpu()) {
GPU_texture_clear(output, GPU_DATA_FLOAT, color);
}
else {
parallel_for(domain.size, [&](const int2 texel) { output.store_pixel(texel, color); });
}
}
/* Executes when the alpha channel of the image is ignored. */
void execute_ignore_alpha()
{
GPUShader *shader = context().get_shader("compositor_write_output_opaque",
ResultPrecision::Half);
if (context().use_gpu()) {
this->execute_ignore_alpha_gpu();
}
else {
this->execute_ignore_alpha_cpu();
}
}
void execute_ignore_alpha_gpu()
{
const Result &image = get_input("Image");
const Domain domain = compute_domain();
Result output = context().get_output_result();
GPUShader *shader = context().get_shader("compositor_write_output_opaque", output.precision());
GPU_shader_bind(shader);
/* The compositing space might be limited to a subset of the output texture, so only write into
* that compositing region. */
const rcti compositing_region = context().get_compositing_region();
const int2 lower_bound = int2(compositing_region.xmin, compositing_region.ymin);
const int2 upper_bound = int2(compositing_region.xmax, compositing_region.ymax);
GPU_shader_uniform_2iv(shader, "lower_bound", lower_bound);
GPU_shader_uniform_2iv(shader, "upper_bound", upper_bound);
const Bounds<int2> bounds = get_output_bounds();
GPU_shader_uniform_2iv(shader, "lower_bound", bounds.min);
GPU_shader_uniform_2iv(shader, "upper_bound", bounds.max);
const Result &image = get_input("Image");
image.bind_as_texture(shader, "input_tx");
GPUTexture *output_texture = context().get_output_texture();
const int image_unit = GPU_shader_get_sampler_binding(shader, "output_img");
GPU_texture_image_bind(output_texture, image_unit);
output.bind_as_image(shader, "output_img");
const int2 compositing_region_size = context().get_compositing_region_size();
compute_dispatch_threads_at_least(shader, compositing_region_size);
compute_dispatch_threads_at_least(shader, domain.size);
image.unbind_as_texture();
GPU_texture_image_unbind(output_texture);
output.unbind_as_image();
GPU_shader_unbind();
}
void execute_ignore_alpha_cpu()
{
const Domain domain = compute_domain();
const Result &image = get_input("Image");
Result output = context().get_output_result();
const Bounds<int2> bounds = get_output_bounds();
parallel_for(domain.size, [&](const int2 texel) {
const int2 output_texel = texel + bounds.min;
if (output_texel.x > bounds.max.x || output_texel.y > bounds.max.y) {
return;
}
output.store_pixel(texel + bounds.min, float4(image.load_pixel(texel).xyz(), 1.0f));
});
}
/* Executes when the image texture is written with no adjustments and can thus be copied directly
* to the output texture. */
* to the output. */
void execute_copy()
{
GPUShader *shader = context().get_shader("compositor_write_output", ResultPrecision::Half);
if (context().use_gpu()) {
this->execute_copy_gpu();
}
else {
this->execute_copy_cpu();
}
}
void execute_copy_gpu()
{
const Result &image = get_input("Image");
const Domain domain = compute_domain();
Result output = context().get_output_result();
GPUShader *shader = context().get_shader("compositor_write_output", output.precision());
GPU_shader_bind(shader);
/* The compositing space might be limited to a subset of the output texture, so only write into
* that compositing region. */
const rcti compositing_region = context().get_compositing_region();
const int2 lower_bound = int2(compositing_region.xmin, compositing_region.ymin);
const int2 upper_bound = int2(compositing_region.xmax, compositing_region.ymax);
GPU_shader_uniform_2iv(shader, "lower_bound", lower_bound);
GPU_shader_uniform_2iv(shader, "upper_bound", upper_bound);
const Bounds<int2> bounds = get_output_bounds();
GPU_shader_uniform_2iv(shader, "lower_bound", bounds.min);
GPU_shader_uniform_2iv(shader, "upper_bound", bounds.max);
const Result &image = get_input("Image");
image.bind_as_texture(shader, "input_tx");
GPUTexture *output_texture = context().get_output_texture();
const int image_unit = GPU_shader_get_sampler_binding(shader, "output_img");
GPU_texture_image_bind(output_texture, image_unit);
output.bind_as_image(shader, "output_img");
const int2 compositing_region_size = context().get_compositing_region_size();
compute_dispatch_threads_at_least(shader, compositing_region_size);
compute_dispatch_threads_at_least(shader, domain.size);
image.unbind_as_texture();
GPU_texture_image_unbind(output_texture);
output.unbind_as_image();
GPU_shader_unbind();
}
void execute_copy_cpu()
{
const Domain domain = compute_domain();
const Result &image = get_input("Image");
Result output = context().get_output_result();
const Bounds<int2> bounds = get_output_bounds();
parallel_for(domain.size, [&](const int2 texel) {
const int2 output_texel = texel + bounds.min;
if (output_texel.x > bounds.max.x || output_texel.y > bounds.max.y) {
return;
}
output.store_pixel(texel + bounds.min, image.load_pixel(texel));
});
}
/* Executes when the alpha channel of the image is set as the value of the input alpha. */
void execute_set_alpha()
{
GPUShader *shader = context().get_shader("compositor_write_output_alpha",
ResultPrecision::Half);
if (context().use_gpu()) {
execute_set_alpha_gpu();
}
else {
execute_set_alpha_cpu();
}
}
void execute_set_alpha_gpu()
{
const Result &image = get_input("Image");
const Domain domain = compute_domain();
Result output = context().get_output_result();
GPUShader *shader = context().get_shader("compositor_write_output_alpha", output.precision());
GPU_shader_bind(shader);
/* The compositing space might be limited to a subset of the output texture, so only write into
* that compositing region. */
const rcti compositing_region = context().get_compositing_region();
const int2 lower_bound = int2(compositing_region.xmin, compositing_region.ymin);
const int2 upper_bound = int2(compositing_region.xmax, compositing_region.ymax);
GPU_shader_uniform_2iv(shader, "lower_bound", lower_bound);
GPU_shader_uniform_2iv(shader, "upper_bound", upper_bound);
const Bounds<int2> bounds = get_output_bounds();
GPU_shader_uniform_2iv(shader, "lower_bound", bounds.min);
GPU_shader_uniform_2iv(shader, "upper_bound", bounds.max);
const Result &image = get_input("Image");
image.bind_as_texture(shader, "input_tx");
const Result &alpha = get_input("Alpha");
alpha.bind_as_texture(shader, "alpha_tx");
GPUTexture *output_texture = context().get_output_texture();
const int image_unit = GPU_shader_get_sampler_binding(shader, "output_img");
GPU_texture_image_bind(output_texture, image_unit);
output.bind_as_image(shader, "output_img");
const int2 compositing_region_size = context().get_compositing_region_size();
compute_dispatch_threads_at_least(shader, compositing_region_size);
compute_dispatch_threads_at_least(shader, domain.size);
image.unbind_as_texture();
alpha.unbind_as_texture();
GPU_texture_image_unbind(output_texture);
output.unbind_as_image();
GPU_shader_unbind();
}
void execute_set_alpha_cpu()
{
const Domain domain = compute_domain();
const Result &image = get_input("Image");
const Result &alpha = get_input("Alpha");
Result output = context().get_output_result();
const Bounds<int2> bounds = get_output_bounds();
parallel_for(domain.size, [&](const int2 texel) {
const int2 output_texel = texel + bounds.min;
if (output_texel.x > bounds.max.x || output_texel.y > bounds.max.y) {
return;
}
output.store_pixel(texel + bounds.min,
float4(image.load_pixel(texel).xyz(), alpha.load_pixel(texel).x));
});
}
/* Returns the bounds of the area of the compositing region. Only write into the compositing
* region, which might be limited to a smaller region of the output result. */
Bounds<int2> get_output_bounds()
{
const rcti compositing_region = context().get_compositing_region();
return Bounds<int2>(int2(compositing_region.xmin, compositing_region.ymin),
int2(compositing_region.xmax, compositing_region.ymax));
}
/* If true, the alpha channel of the image is set to 1, that is, it becomes opaque. If false, the
* alpha channel of the image is retained, but only if the alpha input is not linked. If the
* alpha input is linked, it the value of that input will be used as the alpha of the image. */

View File

@@ -64,7 +64,6 @@ class ViewerOperation : public NodeOperation {
const Result &image = get_input("Image");
const Result &alpha = get_input("Alpha");
if (image.is_single_value() && alpha.is_single_value()) {
execute_clear();
}

View File

@@ -154,10 +154,10 @@ class Context : public realtime_compositor::Context {
/* Input data. */
ContextInputData input_data_;
/* Output combined texture. */
GPUTexture *output_texture_ = nullptr;
/* Output combined result. */
realtime_compositor::Result output_result_;
/* Viewer output texture. */
/* Viewer output result. */
realtime_compositor::Result viewer_output_result_;
/* Cached textures that the compositor took ownership of. */
@@ -167,13 +167,14 @@ class Context : public realtime_compositor::Context {
Context(const ContextInputData &input_data, TexturePool &texture_pool)
: realtime_compositor::Context(texture_pool),
input_data_(input_data),
output_result_(this->create_result(realtime_compositor::ResultType::Color)),
viewer_output_result_(this->create_result(realtime_compositor::ResultType::Color))
{
}
virtual ~Context()
{
GPU_TEXTURE_FREE_SAFE(output_texture_);
output_result_.release();
viewer_output_result_.release();
for (GPUTexture *texture : textures_) {
GPU_texture_free(texture);
@@ -235,24 +236,24 @@ class Context : public realtime_compositor::Context {
return render_region;
}
GPUTexture *get_output_texture() override
realtime_compositor::Result get_output_result() override
{
/* TODO: just a temporary hack, needs to get stored in RenderResult,
* once that supports GPU buffers. */
if (output_texture_ == nullptr) {
const int2 size = get_render_size();
output_texture_ = GPU_texture_create_2d(
"compositor_output_texture",
size.x,
size.y,
1,
get_precision() == realtime_compositor::ResultPrecision::Half ? GPU_RGBA16F :
GPU_RGBA32F,
GPU_TEXTURE_USAGE_GENERAL,
nullptr);
const int2 render_size = get_render_size();
if (output_result_.is_allocated()) {
/* If the allocated result have the same size as the render size, return it as is. */
if (render_size == output_result_.domain().size) {
return output_result_;
}
else {
/* Otherwise, the size changed, so release its data and reset it, then we reallocate it on
* the new render size below. */
output_result_.release();
output_result_.reset();
}
}
return output_texture_;
output_result_.allocate_texture(render_size, false);
return output_result_;
}
realtime_compositor::Result get_viewer_output_result(realtime_compositor::Domain domain,
@@ -446,7 +447,7 @@ class Context : public realtime_compositor::Context {
void output_to_render_result()
{
if (!output_texture_) {
if (!output_result_.is_allocated()) {
return;
}
@@ -455,18 +456,22 @@ class Context : public realtime_compositor::Context {
if (rr) {
RenderView *rv = RE_RenderViewGetByName(rr, input_data_.view_name.c_str());
ImBuf *ibuf = RE_RenderViewEnsureImBuf(rr, rv);
rr->have_combined = true;
GPU_memory_barrier(GPU_BARRIER_TEXTURE_UPDATE);
float *output_buffer = (float *)GPU_texture_read(output_texture_, GPU_DATA_FLOAT, 0);
if (output_buffer) {
ImBuf *ibuf = RE_RenderViewEnsureImBuf(rr, rv);
if (this->use_gpu()) {
GPU_memory_barrier(GPU_BARRIER_TEXTURE_UPDATE);
float *output_buffer = static_cast<float *>(
GPU_texture_read(output_result_, GPU_DATA_FLOAT, 0));
IMB_assign_float_buffer(ibuf, output_buffer, IB_TAKE_OWNERSHIP);
}
/* TODO: z-buffer output. */
rr->have_combined = true;
else {
float *data = static_cast<float *>(
MEM_malloc_arrayN(rr->rectx * rr->recty, 4 * sizeof(float), __func__));
IMB_assign_float_buffer(ibuf, data, IB_TAKE_OWNERSHIP);
std::memcpy(
data, output_result_.float_texture(), rr->rectx * rr->recty * 4 * sizeof(float));
}
}
if (re) {