Compositor: Implement Summed Area Table for new CPU compositor
Reference #125968.
This commit is contained in:
@@ -4,6 +4,9 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "BLI_math_vector.hh"
|
||||
#include "BLI_math_vector_types.hh"
|
||||
|
||||
#include "COM_context.hh"
|
||||
#include "COM_result.hh"
|
||||
|
||||
@@ -26,4 +29,52 @@ void summed_area_table(Context &context,
|
||||
Result &output,
|
||||
SummedAreaTableOperation operation = SummedAreaTableOperation::Identity);
|
||||
|
||||
/* Computes the sum of the rectangular region defined by the given lower and upper bounds from the
|
||||
* given summed area table. It is assumed that the given upper bound is larger than the given lower
|
||||
* bound, otherwise, undefined behavior is invoked. Looking at the diagram below, in order to
|
||||
* compute the sum of area X, we sample the table at each of the corners of the area X, to get:
|
||||
*
|
||||
* Upper Right -> A + B + C + X (1)
|
||||
* Upper Left -> A + B (2)
|
||||
* Lower Right -> B + C (3)
|
||||
* Lower Left -> B (4)
|
||||
*
|
||||
* We start from (1) and subtract (2) and (3) to get rid of A and C to get:
|
||||
*
|
||||
* (A + B + C + X) - (A + B) - (B + C) = (X - B)
|
||||
*
|
||||
* To get rid of B, we add (4) to get:
|
||||
*
|
||||
* (X - B) + B = X
|
||||
*
|
||||
* ^
|
||||
* |
|
||||
* +-------+-----+
|
||||
* | | |
|
||||
* | A | X |
|
||||
* | | |
|
||||
* +-------+-----+
|
||||
* | | |
|
||||
* | B | C |
|
||||
* | | |
|
||||
* o-------+-----+------>
|
||||
*
|
||||
* The aforementioned equation eliminates the edges between regions X, C, and A since they get
|
||||
* subtracted with C and A. To avoid this, we subtract 1 from the lower bound and fallback to zero
|
||||
* for out of bound sampling. */
|
||||
inline float4 summed_area_table_sum(const Result &table,
|
||||
const int2 &lower_bound,
|
||||
const int2 &upper_bound)
|
||||
{
|
||||
int2 corrected_lower_bound = lower_bound - int2(1);
|
||||
int2 corrected_upper_bound = math::min(table.domain().size - int2(1), upper_bound);
|
||||
float4 addend = table.load_pixel_fallback(corrected_upper_bound, float4(0.0f)) +
|
||||
table.load_pixel_fallback(corrected_lower_bound, float4(0.0f));
|
||||
float4 subtrahend = table.load_pixel_fallback(
|
||||
int2(corrected_lower_bound.x, corrected_upper_bound.y), float4(0.0f)) +
|
||||
table.load_pixel_fallback(
|
||||
int2(corrected_upper_bound.x, corrected_lower_bound.y), float4(0.0f));
|
||||
return addend - subtrahend;
|
||||
}
|
||||
|
||||
} // namespace blender::realtime_compositor
|
||||
|
||||
@@ -3,9 +3,11 @@
|
||||
* SPDX-License-Identifier: GPL-2.0-or-later */
|
||||
|
||||
#include "BLI_assert.h"
|
||||
#include "BLI_index_range.hh"
|
||||
#include "BLI_math_base.hh"
|
||||
#include "BLI_math_vector.hh"
|
||||
#include "BLI_math_vector_types.hh"
|
||||
#include "BLI_task.hh"
|
||||
|
||||
#include "GPU_compute.hh"
|
||||
#include "GPU_shader.hh"
|
||||
@@ -199,10 +201,10 @@ static void compute_complete_blocks(Context &context,
|
||||
output.unbind_as_image();
|
||||
}
|
||||
|
||||
void summed_area_table(Context &context,
|
||||
Result &input,
|
||||
Result &output,
|
||||
SummedAreaTableOperation operation)
|
||||
static void summed_area_table_gpu(Context &context,
|
||||
Result &input,
|
||||
Result &output,
|
||||
SummedAreaTableOperation operation)
|
||||
{
|
||||
Result incomplete_x_prologues = context.create_result(ResultType::Color, ResultPrecision::Full);
|
||||
Result incomplete_y_prologues = context.create_result(ResultType::Color, ResultPrecision::Full);
|
||||
@@ -228,4 +230,53 @@ void summed_area_table(Context &context,
|
||||
complete_y_prologues.release();
|
||||
}
|
||||
|
||||
/* Computes the summed area table as a cascade of a horizontal summing pass followed by a vertical
|
||||
* summing pass. */
|
||||
static void summed_area_table_cpu(Result &input,
|
||||
Result &output,
|
||||
SummedAreaTableOperation operation)
|
||||
{
|
||||
output.allocate_texture(input.domain());
|
||||
|
||||
/* Horizontal summing pass. */
|
||||
const int2 size = input.domain().size;
|
||||
threading::parallel_for(IndexRange(size.y), 1, [&](const IndexRange range_y) {
|
||||
for (const int y : range_y) {
|
||||
float4 accumulated_color = float4(0.0f);
|
||||
for (const int x : IndexRange(size.x)) {
|
||||
const int2 texel = int2(x, y);
|
||||
const float4 color = input.load_pixel(texel);
|
||||
accumulated_color += operation == SummedAreaTableOperation::Square ? color * color : color;
|
||||
output.store_pixel(texel, accumulated_color);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
/* Vertical summing pass. */
|
||||
threading::parallel_for(IndexRange(size.x), 1, [&](const IndexRange range_x) {
|
||||
for (const int x : range_x) {
|
||||
float4 accumulated_color = float4(0.0f);
|
||||
for (const int y : IndexRange(size.y)) {
|
||||
const int2 texel = int2(x, y);
|
||||
const float4 color = output.load_pixel(texel);
|
||||
accumulated_color += color;
|
||||
output.store_pixel(texel, accumulated_color);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
void summed_area_table(Context &context,
|
||||
Result &input,
|
||||
Result &output,
|
||||
SummedAreaTableOperation operation)
|
||||
{
|
||||
if (context.use_gpu()) {
|
||||
summed_area_table_gpu(context, input, output, operation);
|
||||
}
|
||||
else {
|
||||
summed_area_table_cpu(input, output, operation);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace blender::realtime_compositor
|
||||
|
||||
Reference in New Issue
Block a user