Files
test/source/blender/compositor/algorithms/intern/parallel_reduction.cc
Omar Emara 89e0472e49 Compositor: Use gpu::TexturePool instead of DRW pool
This patch removes the compositor texture pool implementation which
relies on the DRW texture pool, and replaces it with the new texture
pool implementation from the GPU module.

Since the GPU module texture pool does not rely on the global DST, we
can use it for both the viewport compositor engine and the GPU
compositor, so the virtual texture pool implementation is removed and
the GPU texture pool is used directly.

The viewport compositor engine does not need to reset the pool because
that is done by the draw manager. But the GPU compositor needs to reset
the pool every evaluation. The pool is deleted directly after rendering
using the render pipeline or through RE_FreeUnusedGPUResources for the
interactive compositor.

Pull Request: https://projects.blender.org/blender/blender/pulls/134437
2025-02-12 15:59:45 +01:00

765 lines
26 KiB
C++

/* SPDX-FileCopyrightText: 2023 Blender Authors
*
* SPDX-License-Identifier: GPL-2.0-or-later */
#include <cmath>
#include <limits>
#include "BLI_index_range.hh"
#include "BLI_math_base.hh"
#include "BLI_math_vector.hh"
#include "BLI_math_vector_types.hh"
#include "BLI_task.hh"
#include "MEM_guardedalloc.h"
#include "GPU_compute.hh"
#include "GPU_shader.hh"
#include "GPU_state.hh"
#include "GPU_texture.hh"
#include "GPU_texture_pool.hh"
#include "COM_context.hh"
#include "COM_result.hh"
#include "COM_algorithm_parallel_reduction.hh"
namespace blender::compositor {
/* Reduces the given texture into a single value and returns it. The return value should be freed
* by a call to MEM_freeN. The return value is either a pointer to a float, or a pointer to an
* array of floats that represents a vector. This depends on the given format, which should be
* compatible with the reduction shader.
*
* The given reduction shader should be bound when calling the function and the shader is expected
* to be derived from the compositor_parallel_reduction.glsl shader, see that file for more
* information. Also see the compositor_parallel_reduction_info.hh file for example shader
* definitions. */
static float *parallel_reduction_dispatch(GPUTexture *texture,
GPUShader *shader,
eGPUTextureFormat format)
{
GPU_shader_uniform_1b(shader, "is_initial_reduction", true);
GPUTexture *texture_to_reduce = texture;
int2 size_to_reduce = int2(GPU_texture_width(texture), GPU_texture_height(texture));
/* Dispatch the reduction shader until the texture reduces to a single pixel. */
while (size_to_reduce != int2(1)) {
const int2 reduced_size = math::divide_ceil(size_to_reduce, int2(16));
GPUTexture *reduced_texture = gpu::TexturePool::get().acquire_texture(
reduced_size.x, reduced_size.y, format, GPU_TEXTURE_USAGE_GENERAL);
GPU_memory_barrier(GPU_BARRIER_TEXTURE_FETCH);
const int texture_image_unit = GPU_shader_get_sampler_binding(shader, "input_tx");
GPU_texture_bind(texture_to_reduce, texture_image_unit);
const int image_unit = GPU_shader_get_sampler_binding(shader, "output_img");
GPU_texture_image_bind(reduced_texture, image_unit);
GPU_compute_dispatch(shader, reduced_size.x, reduced_size.y, 1);
GPU_texture_image_unbind(reduced_texture);
GPU_texture_unbind(texture_to_reduce);
/* Release the input texture only if it is not the source texture, since the source texture is
* not acquired or owned by the function. */
if (texture_to_reduce != texture) {
gpu::TexturePool::get().release_texture(texture_to_reduce);
}
texture_to_reduce = reduced_texture;
size_to_reduce = reduced_size;
GPU_shader_uniform_1b(shader, "is_initial_reduction", false);
}
GPU_memory_barrier(GPU_BARRIER_TEXTURE_UPDATE);
float *pixel = static_cast<float *>(GPU_texture_read(texture_to_reduce, GPU_DATA_FLOAT, 0));
/* Release the final texture only if it is not the source texture, since the source texture is
* not acquired or owned by the function. */
if (texture_to_reduce != texture) {
gpu::TexturePool::get().release_texture(texture_to_reduce);
}
return pixel;
}
/* Reduces the given function in parallel over the given 2D range, the reduction function should
* have the given identity value. The given function gets as arguments the texel coordinates of the
* element of the range as well as a reference to the value where the result should be accumulated,
* while the reduction function gets a reference to two values and returns their reduction. */
template<typename Value, typename Function, typename Reduction>
static Value parallel_reduce(const int2 range,
const Value &identity,
const Function &function,
const Reduction &reduction)
{
return threading::parallel_reduce(
IndexRange(range.y),
64,
identity,
[&](const IndexRange sub_y_range, const Value &initial_value) {
Value result = initial_value;
for (const int64_t y : sub_y_range) {
for (const int64_t x : IndexRange(range.x)) {
function(int2(x, y), result);
}
}
return result;
},
reduction);
}
/* --------------------------------------------------------------------
* Sum Reductions.
*/
static float sum_red_gpu(Context &context, const Result &result)
{
GPUShader *shader = context.get_shader("compositor_sum_red", ResultPrecision::Full);
GPU_shader_bind(shader);
float *reduced_value = parallel_reduction_dispatch(
result, shader, Result::gpu_texture_format(ResultType::Float, ResultPrecision::Full));
const float sum = *reduced_value;
MEM_freeN(reduced_value);
GPU_shader_unbind();
return sum;
}
static float sum_red_cpu(const Result &result)
{
return float(parallel_reduce(
result.domain().size,
0.0,
[&](const int2 texel, double &accumulated_value) {
accumulated_value += result.load_pixel<float4>(texel).x;
},
[&](const double &a, const double &b) { return a + b; }));
}
float sum_red(Context &context, const Result &result)
{
if (context.use_gpu()) {
return sum_red_gpu(context, result);
}
return sum_red_cpu(result);
}
static float sum_green_gpu(Context &context, const Result &result)
{
GPUShader *shader = context.get_shader("compositor_sum_green", ResultPrecision::Full);
GPU_shader_bind(shader);
float *reduced_value = parallel_reduction_dispatch(
result, shader, Result::gpu_texture_format(ResultType::Float, ResultPrecision::Full));
const float sum = *reduced_value;
MEM_freeN(reduced_value);
GPU_shader_unbind();
return sum;
}
static float sum_green_cpu(const Result &result)
{
return float(parallel_reduce(
result.domain().size,
0.0,
[&](const int2 texel, double &accumulated_value) {
accumulated_value += result.load_pixel<float4>(texel).y;
},
[&](const double &a, const double &b) { return a + b; }));
}
float sum_green(Context &context, const Result &result)
{
if (context.use_gpu()) {
return sum_green_gpu(context, result);
}
return sum_green_cpu(result);
}
static float sum_blue_gpu(Context &context, const Result &result)
{
GPUShader *shader = context.get_shader("compositor_sum_blue", ResultPrecision::Full);
GPU_shader_bind(shader);
float *reduced_value = parallel_reduction_dispatch(
result, shader, Result::gpu_texture_format(ResultType::Float, ResultPrecision::Full));
const float sum = *reduced_value;
MEM_freeN(reduced_value);
GPU_shader_unbind();
return sum;
}
static float sum_blue_cpu(const Result &result)
{
return float(parallel_reduce(
result.domain().size,
0.0,
[&](const int2 texel, double &accumulated_value) {
accumulated_value += result.load_pixel<float4>(texel).z;
},
[&](const double &a, const double &b) { return a + b; }));
}
float sum_blue(Context &context, const Result &result)
{
if (context.use_gpu()) {
return sum_blue_gpu(context, result);
}
return sum_blue_cpu(result);
}
static float sum_luminance_gpu(Context &context,
const Result &result,
const float3 &luminance_coefficients)
{
GPUShader *shader = context.get_shader("compositor_sum_luminance", ResultPrecision::Full);
GPU_shader_bind(shader);
GPU_shader_uniform_3fv(shader, "luminance_coefficients", luminance_coefficients);
float *reduced_value = parallel_reduction_dispatch(
result, shader, Result::gpu_texture_format(ResultType::Float, ResultPrecision::Full));
const float sum = *reduced_value;
MEM_freeN(reduced_value);
GPU_shader_unbind();
return sum;
}
static float sum_luminance_cpu(const Result &result, const float3 &luminance_coefficients)
{
return float(parallel_reduce(
result.domain().size,
0.0,
[&](const int2 texel, double &accumulated_value) {
accumulated_value += math::dot(result.load_pixel<float4>(texel).xyz(),
luminance_coefficients);
},
[&](const double &a, const double &b) { return a + b; }));
}
float sum_luminance(Context &context, const Result &result, const float3 &luminance_coefficients)
{
if (context.use_gpu()) {
return sum_luminance_gpu(context, result, luminance_coefficients);
}
return sum_luminance_cpu(result, luminance_coefficients);
}
static float sum_log_luminance_gpu(Context &context,
const Result &result,
const float3 &luminance_coefficients)
{
GPUShader *shader = context.get_shader("compositor_sum_log_luminance", ResultPrecision::Full);
GPU_shader_bind(shader);
GPU_shader_uniform_3fv(shader, "luminance_coefficients", luminance_coefficients);
float *reduced_value = parallel_reduction_dispatch(
result, shader, Result::gpu_texture_format(ResultType::Float, ResultPrecision::Full));
const float sum = *reduced_value;
MEM_freeN(reduced_value);
GPU_shader_unbind();
return sum;
}
static float sum_log_luminance_cpu(const Result &result, const float3 &luminance_coefficients)
{
return float(parallel_reduce(
result.domain().size,
0.0,
[&](const int2 texel, double &accumulated_value) {
const float luminance = math::dot(result.load_pixel<float4>(texel).xyz(),
luminance_coefficients);
accumulated_value += std::log(math::max(luminance, 1e-5f));
},
[&](const double &a, const double &b) { return a + b; }));
}
float sum_log_luminance(Context &context,
const Result &result,
const float3 &luminance_coefficients)
{
if (context.use_gpu()) {
return sum_log_luminance_gpu(context, result, luminance_coefficients);
}
return sum_log_luminance_cpu(result, luminance_coefficients);
}
static float4 sum_color_gpu(Context &context, const Result &result)
{
GPUShader *shader = context.get_shader("compositor_sum_color", ResultPrecision::Full);
GPU_shader_bind(shader);
float *reduced_value = parallel_reduction_dispatch(
result, shader, Result::gpu_texture_format(ResultType::Color, ResultPrecision::Full));
const float4 sum = float4(reduced_value);
MEM_freeN(reduced_value);
GPU_shader_unbind();
return sum;
}
static float4 sum_color_cpu(const Result &result)
{
return float4(parallel_reduce(
result.domain().size,
double4(0.0),
[&](const int2 texel, double4 &accumulated_value) {
accumulated_value += double4(result.load_pixel<float4>(texel));
},
[&](const double4 &a, const double4 &b) { return a + b; }));
}
float4 sum_color(Context &context, const Result &result)
{
if (context.use_gpu()) {
return sum_color_gpu(context, result);
}
return sum_color_cpu(result);
}
/* --------------------------------------------------------------------
* Sum Of Squared Difference Reductions.
*/
static float sum_red_squared_difference_gpu(Context &context,
const Result &result,
const float subtrahend)
{
GPUShader *shader = context.get_shader("compositor_sum_red_squared_difference",
ResultPrecision::Full);
GPU_shader_bind(shader);
GPU_shader_uniform_1f(shader, "subtrahend", subtrahend);
float *reduced_value = parallel_reduction_dispatch(
result, shader, Result::gpu_texture_format(ResultType::Float, ResultPrecision::Full));
const float sum = *reduced_value;
MEM_freeN(reduced_value);
GPU_shader_unbind();
return sum;
}
static float sum_red_squared_difference_cpu(const Result &result, const float subtrahend)
{
return float(parallel_reduce(
result.domain().size,
0.0,
[&](const int2 texel, double &accumulated_value) {
accumulated_value += math::square(result.load_pixel<float4>(texel).x - subtrahend);
},
[&](const double &a, const double &b) { return a + b; }));
}
float sum_red_squared_difference(Context &context, const Result &result, const float subtrahend)
{
if (context.use_gpu()) {
return sum_red_squared_difference_gpu(context, result, subtrahend);
}
return sum_red_squared_difference_cpu(result, subtrahend);
}
static float sum_green_squared_difference_gpu(Context &context,
const Result &result,
const float subtrahend)
{
GPUShader *shader = context.get_shader("compositor_sum_green_squared_difference",
ResultPrecision::Full);
GPU_shader_bind(shader);
GPU_shader_uniform_1f(shader, "subtrahend", subtrahend);
float *reduced_value = parallel_reduction_dispatch(
result, shader, Result::gpu_texture_format(ResultType::Float, ResultPrecision::Full));
const float sum = *reduced_value;
MEM_freeN(reduced_value);
GPU_shader_unbind();
return sum;
}
static float sum_green_squared_difference_cpu(const Result &result, const float subtrahend)
{
return float(parallel_reduce(
result.domain().size,
0.0,
[&](const int2 texel, double &accumulated_value) {
accumulated_value += math::square(result.load_pixel<float4>(texel).y - subtrahend);
},
[&](const double &a, const double &b) { return a + b; }));
}
float sum_green_squared_difference(Context &context, const Result &result, const float subtrahend)
{
if (context.use_gpu()) {
return sum_green_squared_difference_gpu(context, result, subtrahend);
}
return sum_green_squared_difference_cpu(result, subtrahend);
}
static float sum_blue_squared_difference_gpu(Context &context,
const Result &result,
const float subtrahend)
{
GPUShader *shader = context.get_shader("compositor_sum_blue_squared_difference",
ResultPrecision::Full);
GPU_shader_bind(shader);
GPU_shader_uniform_1f(shader, "subtrahend", subtrahend);
float *reduced_value = parallel_reduction_dispatch(
result, shader, Result::gpu_texture_format(ResultType::Float, ResultPrecision::Full));
const float sum = *reduced_value;
MEM_freeN(reduced_value);
GPU_shader_unbind();
return sum;
}
static float sum_blue_squared_difference_cpu(const Result &result, const float subtrahend)
{
return float(parallel_reduce(
result.domain().size,
0.0,
[&](const int2 texel, double &accumulated_value) {
accumulated_value += math::square(result.load_pixel<float4>(texel).z - subtrahend);
},
[&](const double &a, const double &b) { return a + b; }));
}
float sum_blue_squared_difference(Context &context, const Result &result, const float subtrahend)
{
if (context.use_gpu()) {
return sum_blue_squared_difference_gpu(context, result, subtrahend);
}
return sum_blue_squared_difference_cpu(result, subtrahend);
}
static float sum_luminance_squared_difference_gpu(Context &context,
const Result &result,
const float3 &luminance_coefficients,
const float subtrahend)
{
GPUShader *shader = context.get_shader("compositor_sum_luminance_squared_difference",
ResultPrecision::Full);
GPU_shader_bind(shader);
GPU_shader_uniform_3fv(shader, "luminance_coefficients", luminance_coefficients);
GPU_shader_uniform_1f(shader, "subtrahend", subtrahend);
float *reduced_value = parallel_reduction_dispatch(
result, shader, Result::gpu_texture_format(ResultType::Float, ResultPrecision::Full));
const float sum = *reduced_value;
MEM_freeN(reduced_value);
GPU_shader_unbind();
return sum;
}
static float sum_luminance_squared_difference_cpu(const Result &result,
const float3 &luminance_coefficients,
const float subtrahend)
{
return float(parallel_reduce(
result.domain().size,
0.0,
[&](const int2 texel, double &accumulated_value) {
const float luminance = math::dot(result.load_pixel<float4>(texel).xyz(),
luminance_coefficients);
accumulated_value += math::square(luminance - subtrahend);
},
[&](const double &a, const double &b) { return a + b; }));
}
float sum_luminance_squared_difference(Context &context,
const Result &result,
const float3 &luminance_coefficients,
const float subtrahend)
{
if (context.use_gpu()) {
return sum_luminance_squared_difference_gpu(
context, result, luminance_coefficients, subtrahend);
}
return sum_luminance_squared_difference_cpu(result, luminance_coefficients, subtrahend);
}
/* --------------------------------------------------------------------
* Maximum Reductions.
*/
static float maximum_luminance_gpu(Context &context,
const Result &result,
const float3 &luminance_coefficients)
{
GPUShader *shader = context.get_shader("compositor_maximum_luminance", ResultPrecision::Full);
GPU_shader_bind(shader);
GPU_shader_uniform_3fv(shader, "luminance_coefficients", luminance_coefficients);
float *reduced_value = parallel_reduction_dispatch(
result, shader, Result::gpu_texture_format(ResultType::Float, ResultPrecision::Full));
const float maximum = *reduced_value;
MEM_freeN(reduced_value);
GPU_shader_unbind();
return maximum;
}
static float maximum_luminance_cpu(const Result &result, const float3 &luminance_coefficients)
{
return float(parallel_reduce(
result.domain().size,
std::numeric_limits<float>::lowest(),
[&](const int2 texel, float &accumulated_value) {
const float luminance = math::dot(result.load_pixel<float4>(texel).xyz(),
luminance_coefficients);
accumulated_value = math::max(accumulated_value, luminance);
},
[&](const float &a, const float &b) { return math::max(a, b); }));
}
float maximum_luminance(Context &context,
const Result &result,
const float3 &luminance_coefficients)
{
if (context.use_gpu()) {
return maximum_luminance_gpu(context, result, luminance_coefficients);
}
return maximum_luminance_cpu(result, luminance_coefficients);
}
static float maximum_float_gpu(Context &context, const Result &result)
{
GPUShader *shader = context.get_shader("compositor_maximum_float", ResultPrecision::Full);
GPU_shader_bind(shader);
float *reduced_value = parallel_reduction_dispatch(
result, shader, Result::gpu_texture_format(ResultType::Float, ResultPrecision::Full));
const float maximum = *reduced_value;
MEM_freeN(reduced_value);
GPU_shader_unbind();
return maximum;
}
static float maximum_float_cpu(const Result &result)
{
return float(parallel_reduce(
result.domain().size,
std::numeric_limits<float>::lowest(),
[&](const int2 texel, float &accumulated_value) {
accumulated_value = math::max(accumulated_value, result.load_pixel<float>(texel));
},
[&](const float &a, const float &b) { return math::max(a, b); }));
}
float maximum_float(Context &context, const Result &result)
{
if (context.use_gpu()) {
return maximum_float_gpu(context, result);
}
return maximum_float_cpu(result);
}
static float maximum_float_in_range_gpu(Context &context,
const Result &result,
const float lower_bound,
const float upper_bound)
{
GPUShader *shader = context.get_shader("compositor_maximum_float_in_range",
ResultPrecision::Full);
GPU_shader_bind(shader);
GPU_shader_uniform_1f(shader, "lower_bound", lower_bound);
GPU_shader_uniform_1f(shader, "upper_bound", upper_bound);
float *reduced_value = parallel_reduction_dispatch(
result, shader, Result::gpu_texture_format(ResultType::Float, ResultPrecision::Full));
const float maximum = *reduced_value;
MEM_freeN(reduced_value);
GPU_shader_unbind();
return maximum;
}
static float maximum_float_in_range_cpu(const Result &result,
const float lower_bound,
const float upper_bound)
{
return float(parallel_reduce(
result.domain().size,
lower_bound,
[&](const int2 texel, float &accumulated_value) {
const float value = result.load_pixel<float>(texel);
if ((value <= upper_bound) && (value >= lower_bound)) {
accumulated_value = math::max(accumulated_value, value);
}
},
[&](const float &a, const float &b) { return math::max(a, b); }));
}
float maximum_float_in_range(Context &context,
const Result &result,
const float lower_bound,
const float upper_bound)
{
if (context.use_gpu()) {
return maximum_float_in_range_gpu(context, result, lower_bound, upper_bound);
}
return maximum_float_in_range_cpu(result, lower_bound, upper_bound);
}
/* --------------------------------------------------------------------
* Minimum Reductions.
*/
static float minimum_luminance_gpu(Context &context,
const Result &result,
const float3 &luminance_coefficients)
{
GPUShader *shader = context.get_shader("compositor_minimum_luminance", ResultPrecision::Full);
GPU_shader_bind(shader);
GPU_shader_uniform_3fv(shader, "luminance_coefficients", luminance_coefficients);
float *reduced_value = parallel_reduction_dispatch(
result, shader, Result::gpu_texture_format(ResultType::Float, ResultPrecision::Full));
const float minimum = *reduced_value;
MEM_freeN(reduced_value);
GPU_shader_unbind();
return minimum;
}
static float minimum_luminance_cpu(const Result &result, const float3 &luminance_coefficients)
{
return float(parallel_reduce(
result.domain().size,
std::numeric_limits<float>::max(),
[&](const int2 texel, float &accumulated_value) {
const float luminance = math::dot(result.load_pixel<float4>(texel).xyz(),
luminance_coefficients);
accumulated_value = math::min(accumulated_value, luminance);
},
[&](const float &a, const float &b) { return math::min(a, b); }));
}
float minimum_luminance(Context &context,
const Result &result,
const float3 &luminance_coefficients)
{
if (context.use_gpu()) {
return minimum_luminance_gpu(context, result, luminance_coefficients);
}
return minimum_luminance_cpu(result, luminance_coefficients);
}
static float minimum_float_gpu(Context &context, const Result &result)
{
GPUShader *shader = context.get_shader("compositor_minimum_float", ResultPrecision::Full);
GPU_shader_bind(shader);
float *reduced_value = parallel_reduction_dispatch(
result, shader, Result::gpu_texture_format(ResultType::Float, ResultPrecision::Full));
const float minimum = *reduced_value;
MEM_freeN(reduced_value);
GPU_shader_unbind();
return minimum;
}
static float minimum_float_cpu(const Result &result)
{
return float(parallel_reduce(
result.domain().size,
std::numeric_limits<float>::max(),
[&](const int2 texel, float &accumulated_value) {
accumulated_value = math::min(accumulated_value, result.load_pixel<float>(texel));
},
[&](const float &a, const float &b) { return math::min(a, b); }));
}
float minimum_float(Context &context, const Result &result)
{
if (context.use_gpu()) {
return minimum_float_gpu(context, result);
}
return minimum_float_cpu(result);
}
static float minimum_float_in_range_gpu(Context &context,
const Result &result,
const float lower_bound,
const float upper_bound)
{
GPUShader *shader = context.get_shader("compositor_minimum_float_in_range",
ResultPrecision::Full);
GPU_shader_bind(shader);
GPU_shader_uniform_1f(shader, "lower_bound", lower_bound);
GPU_shader_uniform_1f(shader, "upper_bound", upper_bound);
float *reduced_value = parallel_reduction_dispatch(
result, shader, Result::gpu_texture_format(ResultType::Float, ResultPrecision::Full));
const float minimum = *reduced_value;
MEM_freeN(reduced_value);
GPU_shader_unbind();
return minimum;
}
static float minimum_float_in_range_cpu(const Result &result,
const float lower_bound,
const float upper_bound)
{
return parallel_reduce(
result.domain().size,
upper_bound,
[&](const int2 texel, float &accumulated_value) {
const float value = result.load_pixel<float>(texel);
if ((value <= upper_bound) && (value >= lower_bound)) {
accumulated_value = math::min(accumulated_value, value);
}
},
[&](const float &a, const float &b) { return math::min(a, b); });
}
float minimum_float_in_range(Context &context,
const Result &result,
const float lower_bound,
const float upper_bound)
{
if (context.use_gpu()) {
return minimum_float_in_range_gpu(context, result, lower_bound, upper_bound);
}
return minimum_float_in_range_cpu(result, lower_bound, upper_bound);
}
} // namespace blender::compositor