Compositor: Implement Summed Area Table for new CPU compositor

Reference #125968.
This commit is contained in:
Omar Emara
2024-11-20 15:57:39 +02:00
parent f4767bea72
commit 71e971700c
2 changed files with 106 additions and 4 deletions

View File

@@ -4,6 +4,9 @@
#pragma once
#include "BLI_math_vector.hh"
#include "BLI_math_vector_types.hh"
#include "COM_context.hh"
#include "COM_result.hh"
@@ -26,4 +29,52 @@ void summed_area_table(Context &context,
Result &output,
SummedAreaTableOperation operation = SummedAreaTableOperation::Identity);
/* Computes the sum of the rectangular region defined by the given lower and upper bounds from the
* given summed area table. It is assumed that the given upper bound is larger than the given lower
* bound, otherwise, undefined behavior is invoked. Looking at the diagram below, in order to
* compute the sum of area X, we sample the table at each of the corners of the area X, to get:
*
* Upper Right -> A + B + C + X (1)
* Upper Left -> A + B (2)
* Lower Right -> B + C (3)
* Lower Left -> B (4)
*
* We start from (1) and subtract (2) and (3) to get rid of A and C to get:
*
* (A + B + C + X) - (A + B) - (B + C) = (X - B)
*
* To get rid of B, we add (4) to get:
*
* (X - B) + B = X
*
* ^
* |
* +-------+-----+
* | | |
* | A | X |
* | | |
* +-------+-----+
* | | |
* | B | C |
* | | |
* o-------+-----+------>
*
* The aforementioned equation eliminates the edges between regions X, C, and A since they get
* subtracted with C and A. To avoid this, we subtract 1 from the lower bound and fallback to zero
* for out of bound sampling. */
inline float4 summed_area_table_sum(const Result &table,
const int2 &lower_bound,
const int2 &upper_bound)
{
int2 corrected_lower_bound = lower_bound - int2(1);
int2 corrected_upper_bound = math::min(table.domain().size - int2(1), upper_bound);
float4 addend = table.load_pixel_fallback(corrected_upper_bound, float4(0.0f)) +
table.load_pixel_fallback(corrected_lower_bound, float4(0.0f));
float4 subtrahend = table.load_pixel_fallback(
int2(corrected_lower_bound.x, corrected_upper_bound.y), float4(0.0f)) +
table.load_pixel_fallback(
int2(corrected_upper_bound.x, corrected_lower_bound.y), float4(0.0f));
return addend - subtrahend;
}
} // namespace blender::realtime_compositor

View File

@@ -3,9 +3,11 @@
* SPDX-License-Identifier: GPL-2.0-or-later */
#include "BLI_assert.h"
#include "BLI_index_range.hh"
#include "BLI_math_base.hh"
#include "BLI_math_vector.hh"
#include "BLI_math_vector_types.hh"
#include "BLI_task.hh"
#include "GPU_compute.hh"
#include "GPU_shader.hh"
@@ -199,10 +201,10 @@ static void compute_complete_blocks(Context &context,
output.unbind_as_image();
}
void summed_area_table(Context &context,
Result &input,
Result &output,
SummedAreaTableOperation operation)
static void summed_area_table_gpu(Context &context,
Result &input,
Result &output,
SummedAreaTableOperation operation)
{
Result incomplete_x_prologues = context.create_result(ResultType::Color, ResultPrecision::Full);
Result incomplete_y_prologues = context.create_result(ResultType::Color, ResultPrecision::Full);
@@ -228,4 +230,53 @@ void summed_area_table(Context &context,
complete_y_prologues.release();
}
/* Computes the summed area table as a cascade of a horizontal summing pass followed by a vertical
* summing pass. */
static void summed_area_table_cpu(Result &input,
Result &output,
SummedAreaTableOperation operation)
{
output.allocate_texture(input.domain());
/* Horizontal summing pass. */
const int2 size = input.domain().size;
threading::parallel_for(IndexRange(size.y), 1, [&](const IndexRange range_y) {
for (const int y : range_y) {
float4 accumulated_color = float4(0.0f);
for (const int x : IndexRange(size.x)) {
const int2 texel = int2(x, y);
const float4 color = input.load_pixel(texel);
accumulated_color += operation == SummedAreaTableOperation::Square ? color * color : color;
output.store_pixel(texel, accumulated_color);
}
}
});
/* Vertical summing pass. */
threading::parallel_for(IndexRange(size.x), 1, [&](const IndexRange range_x) {
for (const int x : range_x) {
float4 accumulated_color = float4(0.0f);
for (const int y : IndexRange(size.y)) {
const int2 texel = int2(x, y);
const float4 color = output.load_pixel(texel);
accumulated_color += color;
output.store_pixel(texel, accumulated_color);
}
}
});
}
void summed_area_table(Context &context,
Result &input,
Result &output,
SummedAreaTableOperation operation)
{
if (context.use_gpu()) {
summed_area_table_gpu(context, input, output, operation);
}
else {
summed_area_table_cpu(input, output, operation);
}
}
} // namespace blender::realtime_compositor