Compositor: Implement CPU domain realization
This patch implements the domain realization algorithm for the new CPU compositor. Only nearest interpolation with no wrapping is implemented at the moment. A new sampling method was added to the result class and some relevant methods were moved into inline functions.
This commit is contained in:
@@ -4,7 +4,10 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "BLI_assert.h"
|
||||
#include "BLI_math_interp.hh"
|
||||
#include "BLI_math_matrix_types.hh"
|
||||
#include "BLI_math_vector.h"
|
||||
#include "BLI_math_vector_types.hh"
|
||||
|
||||
#include "GPU_shader.hh"
|
||||
@@ -363,7 +366,7 @@ class Result {
|
||||
const Domain &domain() const;
|
||||
|
||||
/* Returns a reference to the allocate float data. */
|
||||
float *float_texture();
|
||||
float *float_texture() const;
|
||||
|
||||
/* Loads the float pixel at the given texel coordinates and returns it in a float4. If the number
|
||||
* of channels in the result are less than 4, then the rest of the returned float4 will have its
|
||||
@@ -377,6 +380,12 @@ class Result {
|
||||
* float4 will be ignored. This is similar to how the imageStore function in GLSL works. */
|
||||
void store_pixel(const int2 &texel, const float4 &pixel_value);
|
||||
|
||||
/* Equivalent to the GLSL texture() function with nearest interpolation and zero boundary
|
||||
* conditions. The coordinates are thus expected to have half-pixels offsets. A float4 is always
|
||||
* returned regardless of the number of channels of the buffer, the remaining channels will be
|
||||
* initialized with the template float4(0, 0, 0, 1). */
|
||||
float4 sample_nearest_zero(const float2 coordinates) const;
|
||||
|
||||
private:
|
||||
/* Allocates the texture data for the given size, either on the GPU or CPU based on the result's
|
||||
* context. See the allocate_texture method for information about the from_pool argument. */
|
||||
@@ -392,4 +401,99 @@ class Result {
|
||||
void copy_pixel(float *target, const float *source) const;
|
||||
};
|
||||
|
||||
/* -------------------------------------------------------------------- */
|
||||
/* Inline Methods.
|
||||
*/
|
||||
|
||||
inline float4 Result::sample_nearest_zero(const float2 coordinates) const
|
||||
{
|
||||
float4 pixel_value = float4(0.0f, 0.0f, 0.0f, 1.0f);
|
||||
if (is_single_value_) {
|
||||
this->copy_pixel(pixel_value, float_texture_);
|
||||
return pixel_value;
|
||||
}
|
||||
|
||||
const int2 size = domain_.size;
|
||||
const float2 texel_coordinates = coordinates * float2(size);
|
||||
|
||||
math::interpolate_nearest_border_fl(this->float_texture(),
|
||||
pixel_value,
|
||||
size.x,
|
||||
size.y,
|
||||
this->channels_count(),
|
||||
texel_coordinates.x,
|
||||
texel_coordinates.y);
|
||||
return pixel_value;
|
||||
}
|
||||
|
||||
inline const Domain &Result::domain() const
|
||||
{
|
||||
return domain_;
|
||||
}
|
||||
|
||||
inline float *Result::float_texture() const
|
||||
{
|
||||
BLI_assert(storage_type_ == ResultStorageType::FloatCPU);
|
||||
return float_texture_;
|
||||
}
|
||||
|
||||
inline float4 Result::load_pixel(const int2 &texel) const
|
||||
{
|
||||
float4 pixel_value = float4(0.0f, 0.0f, 0.0f, 1.0f);
|
||||
if (is_single_value_) {
|
||||
this->copy_pixel(pixel_value, float_texture_);
|
||||
}
|
||||
else {
|
||||
this->copy_pixel(pixel_value, this->get_float_pixel(texel));
|
||||
}
|
||||
return pixel_value;
|
||||
}
|
||||
|
||||
inline void Result::store_pixel(const int2 &texel, const float4 &pixel_value)
|
||||
{
|
||||
this->copy_pixel(this->get_float_pixel(texel), pixel_value);
|
||||
}
|
||||
|
||||
inline int64_t Result::channels_count() const
|
||||
{
|
||||
switch (type_) {
|
||||
case ResultType::Float:
|
||||
return 1;
|
||||
case ResultType::Float2:
|
||||
case ResultType::Int2:
|
||||
return 2;
|
||||
case ResultType::Float3:
|
||||
return 3;
|
||||
case ResultType::Vector:
|
||||
case ResultType::Color:
|
||||
return 4;
|
||||
}
|
||||
return 4;
|
||||
}
|
||||
|
||||
inline float *Result::get_float_pixel(const int2 &texel) const
|
||||
{
|
||||
return float_texture_ + (texel.y * domain_.size.x + texel.x) * this->channels_count();
|
||||
}
|
||||
|
||||
inline void Result::copy_pixel(float *target, const float *source) const
|
||||
{
|
||||
switch (type_) {
|
||||
case ResultType::Float:
|
||||
*target = *source;
|
||||
break;
|
||||
case ResultType::Float2:
|
||||
case ResultType::Int2:
|
||||
copy_v2_v2(target, source);
|
||||
break;
|
||||
case ResultType::Float3:
|
||||
copy_v3_v3(target, source);
|
||||
break;
|
||||
case ResultType::Vector:
|
||||
case ResultType::Color:
|
||||
copy_v4_v4(target, source);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace blender::realtime_compositor
|
||||
|
||||
@@ -61,42 +61,16 @@ static const char *get_realization_shader(Result &input,
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void realize_on_domain(Context &context,
|
||||
Result &input,
|
||||
Result &output,
|
||||
const Domain &domain,
|
||||
const float3x3 &input_transformation,
|
||||
const RealizationOptions &realization_options)
|
||||
static void realize_on_domain_gpu(Context &context,
|
||||
Result &input,
|
||||
Result &output,
|
||||
const Domain &domain,
|
||||
const float3x3 &inverse_transformation,
|
||||
const RealizationOptions &realization_options)
|
||||
{
|
||||
const Domain input_domain = Domain(input.domain().size, input_transformation);
|
||||
if (input_domain == domain) {
|
||||
input.pass_through(output);
|
||||
output.set_transformation(domain.transformation);
|
||||
return;
|
||||
}
|
||||
|
||||
GPUShader *shader = context.get_shader(get_realization_shader(input, realization_options));
|
||||
GPU_shader_bind(shader);
|
||||
|
||||
/* Translation from lower-left corner to center of input space. */
|
||||
float2 input_translate(-float2(input_domain.size) / 2.0f);
|
||||
|
||||
/* Bias translations in case of nearest interpolation to avoids the round-to-even behavior of
|
||||
* some GPUs at pixel boundaries. */
|
||||
if (realization_options.interpolation == Interpolation::Nearest) {
|
||||
input_translate += std::numeric_limits<float>::epsilon() * 10e3f;
|
||||
}
|
||||
|
||||
/* Transformation from input domain with 0,0 in lower-left to virtual compositing space. */
|
||||
const float3x3 in_transformation = math::translate(input_transformation, input_translate);
|
||||
|
||||
/* Transformation from output domain with 0,0 in lower-left to virtual compositing space. */
|
||||
const float3x3 out_transformation = math::translate(domain.transformation,
|
||||
-float2(domain.size) / 2.0f);
|
||||
|
||||
/* Concatenate to get full transform from output space to input space */
|
||||
const float3x3 inverse_transformation = math::invert(in_transformation) * out_transformation;
|
||||
|
||||
GPU_shader_uniform_mat3_as_mat4(shader, "inverse_transformation", inverse_transformation.ptr());
|
||||
|
||||
/* The texture sampler should use bilinear interpolation for both the bilinear and bicubic
|
||||
@@ -127,4 +101,72 @@ void realize_on_domain(Context &context,
|
||||
GPU_shader_unbind();
|
||||
}
|
||||
|
||||
static void realize_on_domain_cpu(Result &input,
|
||||
Result &output,
|
||||
const Domain &domain,
|
||||
const float3x3 &inverse_transformation)
|
||||
{
|
||||
output.allocate_texture(domain);
|
||||
|
||||
parallel_for(domain.size, [&](const int2 texel) {
|
||||
/* Add 0.5 to evaluate the input sampler at the center of the pixel. */
|
||||
float2 coordinates = float2(texel) + float2(0.5f);
|
||||
|
||||
/* Transform the input image by transforming the domain coordinates with the inverse of input
|
||||
* image's transformation. The inverse transformation is an affine matrix and thus the
|
||||
* coordinates should be in homogeneous coordinates. */
|
||||
coordinates = (inverse_transformation * float3(coordinates, 1.0f)).xy();
|
||||
|
||||
/* Subtract the offset and divide by the input image size to get the relevant coordinates into
|
||||
* the sampler's expected [0, 1] range. */
|
||||
const int2 input_size = input.domain().size;
|
||||
float2 normalized_coordinates = coordinates / float2(input_size);
|
||||
|
||||
/* TODO: Support other interpolations and wrapping modes. */
|
||||
output.store_pixel(texel, input.sample_nearest_zero(normalized_coordinates));
|
||||
});
|
||||
}
|
||||
|
||||
void realize_on_domain(Context &context,
|
||||
Result &input,
|
||||
Result &output,
|
||||
const Domain &domain,
|
||||
const float3x3 &input_transformation,
|
||||
const RealizationOptions &realization_options)
|
||||
{
|
||||
const Domain input_domain = Domain(input.domain().size, input_transformation);
|
||||
if (input_domain == domain) {
|
||||
input.pass_through(output);
|
||||
output.set_transformation(domain.transformation);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Translation from lower-left corner to center of input space. */
|
||||
float2 input_translate(-float2(input_domain.size) / 2.0f);
|
||||
|
||||
/* Bias translations in case of nearest interpolation to avoids the round-to-even behavior of
|
||||
* some GPUs at pixel boundaries. */
|
||||
if (realization_options.interpolation == Interpolation::Nearest) {
|
||||
input_translate += std::numeric_limits<float>::epsilon() * 10e3f;
|
||||
}
|
||||
|
||||
/* Transformation from input domain with 0,0 in lower-left to virtual compositing space. */
|
||||
const float3x3 in_transformation = math::translate(input_transformation, input_translate);
|
||||
|
||||
/* Transformation from output domain with 0,0 in lower-left to virtual compositing space. */
|
||||
const float3x3 out_transformation = math::translate(domain.transformation,
|
||||
-float2(domain.size) / 2.0f);
|
||||
|
||||
/* Concatenate to get full transform from output space to input space */
|
||||
const float3x3 inverse_transformation = math::invert(in_transformation) * out_transformation;
|
||||
|
||||
if (context.use_gpu()) {
|
||||
realize_on_domain_gpu(
|
||||
context, input, output, domain, inverse_transformation, realization_options);
|
||||
}
|
||||
else {
|
||||
realize_on_domain_cpu(input, output, domain, inverse_transformation);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace blender::realtime_compositor
|
||||
|
||||
@@ -709,34 +709,6 @@ int Result::reference_count() const
|
||||
return reference_count_;
|
||||
}
|
||||
|
||||
const Domain &Result::domain() const
|
||||
{
|
||||
return domain_;
|
||||
}
|
||||
|
||||
float *Result::float_texture()
|
||||
{
|
||||
BLI_assert(storage_type_ == ResultStorageType::FloatCPU);
|
||||
return float_texture_;
|
||||
}
|
||||
|
||||
float4 Result::load_pixel(const int2 &texel) const
|
||||
{
|
||||
float4 pixel_value = float4(0.0f, 0.0f, 0.0f, 1.0f);
|
||||
if (is_single_value_) {
|
||||
this->copy_pixel(pixel_value, float_texture_);
|
||||
}
|
||||
else {
|
||||
this->copy_pixel(pixel_value, this->get_float_pixel(texel));
|
||||
}
|
||||
return pixel_value;
|
||||
}
|
||||
|
||||
void Result::store_pixel(const int2 &texel, const float4 &pixel_value)
|
||||
{
|
||||
this->copy_pixel(this->get_float_pixel(texel), pixel_value);
|
||||
}
|
||||
|
||||
void Result::allocate_data(int2 size, bool from_pool)
|
||||
{
|
||||
if (context_->use_gpu()) {
|
||||
@@ -774,46 +746,4 @@ void Result::allocate_data(int2 size, bool from_pool)
|
||||
}
|
||||
}
|
||||
|
||||
int64_t Result::channels_count() const
|
||||
{
|
||||
switch (type_) {
|
||||
case ResultType::Float:
|
||||
return 1;
|
||||
case ResultType::Float2:
|
||||
case ResultType::Int2:
|
||||
return 2;
|
||||
case ResultType::Float3:
|
||||
return 3;
|
||||
case ResultType::Vector:
|
||||
case ResultType::Color:
|
||||
return 4;
|
||||
}
|
||||
return 4;
|
||||
}
|
||||
|
||||
float *Result::get_float_pixel(const int2 &texel) const
|
||||
{
|
||||
return float_texture_ + (texel.y * domain_.size.x + texel.x) * this->channels_count();
|
||||
}
|
||||
|
||||
void Result::copy_pixel(float *target, const float *source) const
|
||||
{
|
||||
switch (type_) {
|
||||
case ResultType::Float:
|
||||
*target = *source;
|
||||
break;
|
||||
case ResultType::Float2:
|
||||
case ResultType::Int2:
|
||||
copy_v2_v2(target, source);
|
||||
break;
|
||||
case ResultType::Float3:
|
||||
copy_v3_v3(target, source);
|
||||
break;
|
||||
case ResultType::Vector:
|
||||
case ResultType::Color:
|
||||
copy_v4_v4(target, source);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace blender::realtime_compositor
|
||||
|
||||
Reference in New Issue
Block a user