Compositor: Implement CPU domain realization

This patch implements the domain realization algorithm for the new CPU
compositor. Only nearest interpolation with no wrapping is implemented
at the moment.

A new sampling method was added to the result class and some relevant
methods were moved into inline functions.
This commit is contained in:
Omar Emara
2024-10-11 12:12:24 +03:00
parent 7f48c931a4
commit 317cf37680
3 changed files with 179 additions and 103 deletions

View File

@@ -4,7 +4,10 @@
#pragma once
#include "BLI_assert.h"
#include "BLI_math_interp.hh"
#include "BLI_math_matrix_types.hh"
#include "BLI_math_vector.h"
#include "BLI_math_vector_types.hh"
#include "GPU_shader.hh"
@@ -363,7 +366,7 @@ class Result {
const Domain &domain() const;
/* Returns a reference to the allocate float data. */
float *float_texture();
float *float_texture() const;
/* Loads the float pixel at the given texel coordinates and returns it in a float4. If the number
* of channels in the result are less than 4, then the rest of the returned float4 will have its
@@ -377,6 +380,12 @@ class Result {
* float4 will be ignored. This is similar to how the imageStore function in GLSL works. */
void store_pixel(const int2 &texel, const float4 &pixel_value);
/* Equivalent to the GLSL texture() function with nearest interpolation and zero boundary
* conditions. The coordinates are thus expected to have half-pixels offsets. A float4 is always
* returned regardless of the number of channels of the buffer, the remaining channels will be
* initialized with the template float4(0, 0, 0, 1). */
float4 sample_nearest_zero(const float2 coordinates) const;
private:
/* Allocates the texture data for the given size, either on the GPU or CPU based on the result's
* context. See the allocate_texture method for information about the from_pool argument. */
@@ -392,4 +401,99 @@ class Result {
void copy_pixel(float *target, const float *source) const;
};
/* -------------------------------------------------------------------- */
/* Inline Methods.
*/
inline float4 Result::sample_nearest_zero(const float2 coordinates) const
{
float4 pixel_value = float4(0.0f, 0.0f, 0.0f, 1.0f);
if (is_single_value_) {
this->copy_pixel(pixel_value, float_texture_);
return pixel_value;
}
const int2 size = domain_.size;
const float2 texel_coordinates = coordinates * float2(size);
math::interpolate_nearest_border_fl(this->float_texture(),
pixel_value,
size.x,
size.y,
this->channels_count(),
texel_coordinates.x,
texel_coordinates.y);
return pixel_value;
}
inline const Domain &Result::domain() const
{
return domain_;
}
inline float *Result::float_texture() const
{
BLI_assert(storage_type_ == ResultStorageType::FloatCPU);
return float_texture_;
}
inline float4 Result::load_pixel(const int2 &texel) const
{
float4 pixel_value = float4(0.0f, 0.0f, 0.0f, 1.0f);
if (is_single_value_) {
this->copy_pixel(pixel_value, float_texture_);
}
else {
this->copy_pixel(pixel_value, this->get_float_pixel(texel));
}
return pixel_value;
}
inline void Result::store_pixel(const int2 &texel, const float4 &pixel_value)
{
this->copy_pixel(this->get_float_pixel(texel), pixel_value);
}
inline int64_t Result::channels_count() const
{
switch (type_) {
case ResultType::Float:
return 1;
case ResultType::Float2:
case ResultType::Int2:
return 2;
case ResultType::Float3:
return 3;
case ResultType::Vector:
case ResultType::Color:
return 4;
}
return 4;
}
inline float *Result::get_float_pixel(const int2 &texel) const
{
return float_texture_ + (texel.y * domain_.size.x + texel.x) * this->channels_count();
}
inline void Result::copy_pixel(float *target, const float *source) const
{
switch (type_) {
case ResultType::Float:
*target = *source;
break;
case ResultType::Float2:
case ResultType::Int2:
copy_v2_v2(target, source);
break;
case ResultType::Float3:
copy_v3_v3(target, source);
break;
case ResultType::Vector:
case ResultType::Color:
copy_v4_v4(target, source);
break;
}
}
} // namespace blender::realtime_compositor

View File

@@ -61,42 +61,16 @@ static const char *get_realization_shader(Result &input,
return nullptr;
}
void realize_on_domain(Context &context,
Result &input,
Result &output,
const Domain &domain,
const float3x3 &input_transformation,
const RealizationOptions &realization_options)
static void realize_on_domain_gpu(Context &context,
Result &input,
Result &output,
const Domain &domain,
const float3x3 &inverse_transformation,
const RealizationOptions &realization_options)
{
const Domain input_domain = Domain(input.domain().size, input_transformation);
if (input_domain == domain) {
input.pass_through(output);
output.set_transformation(domain.transformation);
return;
}
GPUShader *shader = context.get_shader(get_realization_shader(input, realization_options));
GPU_shader_bind(shader);
/* Translation from lower-left corner to center of input space. */
float2 input_translate(-float2(input_domain.size) / 2.0f);
/* Bias translations in case of nearest interpolation to avoids the round-to-even behavior of
* some GPUs at pixel boundaries. */
if (realization_options.interpolation == Interpolation::Nearest) {
input_translate += std::numeric_limits<float>::epsilon() * 10e3f;
}
/* Transformation from input domain with 0,0 in lower-left to virtual compositing space. */
const float3x3 in_transformation = math::translate(input_transformation, input_translate);
/* Transformation from output domain with 0,0 in lower-left to virtual compositing space. */
const float3x3 out_transformation = math::translate(domain.transformation,
-float2(domain.size) / 2.0f);
/* Concatenate to get full transform from output space to input space */
const float3x3 inverse_transformation = math::invert(in_transformation) * out_transformation;
GPU_shader_uniform_mat3_as_mat4(shader, "inverse_transformation", inverse_transformation.ptr());
/* The texture sampler should use bilinear interpolation for both the bilinear and bicubic
@@ -127,4 +101,72 @@ void realize_on_domain(Context &context,
GPU_shader_unbind();
}
static void realize_on_domain_cpu(Result &input,
Result &output,
const Domain &domain,
const float3x3 &inverse_transformation)
{
output.allocate_texture(domain);
parallel_for(domain.size, [&](const int2 texel) {
/* Add 0.5 to evaluate the input sampler at the center of the pixel. */
float2 coordinates = float2(texel) + float2(0.5f);
/* Transform the input image by transforming the domain coordinates with the inverse of input
* image's transformation. The inverse transformation is an affine matrix and thus the
* coordinates should be in homogeneous coordinates. */
coordinates = (inverse_transformation * float3(coordinates, 1.0f)).xy();
/* Subtract the offset and divide by the input image size to get the relevant coordinates into
* the sampler's expected [0, 1] range. */
const int2 input_size = input.domain().size;
float2 normalized_coordinates = coordinates / float2(input_size);
/* TODO: Support other interpolations and wrapping modes. */
output.store_pixel(texel, input.sample_nearest_zero(normalized_coordinates));
});
}
void realize_on_domain(Context &context,
Result &input,
Result &output,
const Domain &domain,
const float3x3 &input_transformation,
const RealizationOptions &realization_options)
{
const Domain input_domain = Domain(input.domain().size, input_transformation);
if (input_domain == domain) {
input.pass_through(output);
output.set_transformation(domain.transformation);
return;
}
/* Translation from lower-left corner to center of input space. */
float2 input_translate(-float2(input_domain.size) / 2.0f);
/* Bias translations in case of nearest interpolation to avoids the round-to-even behavior of
* some GPUs at pixel boundaries. */
if (realization_options.interpolation == Interpolation::Nearest) {
input_translate += std::numeric_limits<float>::epsilon() * 10e3f;
}
/* Transformation from input domain with 0,0 in lower-left to virtual compositing space. */
const float3x3 in_transformation = math::translate(input_transformation, input_translate);
/* Transformation from output domain with 0,0 in lower-left to virtual compositing space. */
const float3x3 out_transformation = math::translate(domain.transformation,
-float2(domain.size) / 2.0f);
/* Concatenate to get full transform from output space to input space */
const float3x3 inverse_transformation = math::invert(in_transformation) * out_transformation;
if (context.use_gpu()) {
realize_on_domain_gpu(
context, input, output, domain, inverse_transformation, realization_options);
}
else {
realize_on_domain_cpu(input, output, domain, inverse_transformation);
}
}
} // namespace blender::realtime_compositor

View File

@@ -709,34 +709,6 @@ int Result::reference_count() const
return reference_count_;
}
const Domain &Result::domain() const
{
return domain_;
}
float *Result::float_texture()
{
BLI_assert(storage_type_ == ResultStorageType::FloatCPU);
return float_texture_;
}
float4 Result::load_pixel(const int2 &texel) const
{
float4 pixel_value = float4(0.0f, 0.0f, 0.0f, 1.0f);
if (is_single_value_) {
this->copy_pixel(pixel_value, float_texture_);
}
else {
this->copy_pixel(pixel_value, this->get_float_pixel(texel));
}
return pixel_value;
}
void Result::store_pixel(const int2 &texel, const float4 &pixel_value)
{
this->copy_pixel(this->get_float_pixel(texel), pixel_value);
}
void Result::allocate_data(int2 size, bool from_pool)
{
if (context_->use_gpu()) {
@@ -774,46 +746,4 @@ void Result::allocate_data(int2 size, bool from_pool)
}
}
int64_t Result::channels_count() const
{
switch (type_) {
case ResultType::Float:
return 1;
case ResultType::Float2:
case ResultType::Int2:
return 2;
case ResultType::Float3:
return 3;
case ResultType::Vector:
case ResultType::Color:
return 4;
}
return 4;
}
float *Result::get_float_pixel(const int2 &texel) const
{
return float_texture_ + (texel.y * domain_.size.x + texel.x) * this->channels_count();
}
void Result::copy_pixel(float *target, const float *source) const
{
switch (type_) {
case ResultType::Float:
*target = *source;
break;
case ResultType::Float2:
case ResultType::Int2:
copy_v2_v2(target, source);
break;
case ResultType::Float3:
copy_v3_v3(target, source);
break;
case ResultType::Vector:
case ResultType::Color:
copy_v4_v4(target, source);
break;
}
}
} // namespace blender::realtime_compositor