Compositor: Implement Dilate node for new CPU compositor
Reference #125968.
This commit is contained in:
@@ -6,6 +6,8 @@
|
||||
* \ingroup cmpnodes
|
||||
*/
|
||||
|
||||
#include <limits>
|
||||
|
||||
#include "BLI_assert.h"
|
||||
#include "BLI_math_base.hh"
|
||||
#include "BLI_math_vector_types.hh"
|
||||
@@ -68,17 +70,6 @@ class DilateErodeOperation : public NodeOperation {
|
||||
|
||||
void execute() override
|
||||
{
|
||||
/* Not yet supported on CPU. */
|
||||
if (!context().use_gpu()) {
|
||||
for (const bNodeSocket *output : this->node()->output_sockets()) {
|
||||
Result &output_result = get_result(output->identifier);
|
||||
if (output_result.should_compute()) {
|
||||
output_result.allocate_invalid();
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (is_identity()) {
|
||||
get_input("Mask").pass_through(get_result("Mask"));
|
||||
return;
|
||||
@@ -115,6 +106,14 @@ class DilateErodeOperation : public NodeOperation {
|
||||
}
|
||||
|
||||
Result execute_step_horizontal_pass()
|
||||
{
|
||||
if (this->context().use_gpu()) {
|
||||
return this->execute_step_horizontal_pass_gpu();
|
||||
}
|
||||
return this->execute_step_horizontal_pass_cpu();
|
||||
}
|
||||
|
||||
Result execute_step_horizontal_pass_gpu()
|
||||
{
|
||||
GPUShader *shader = context().get_shader(get_morphological_step_shader_name());
|
||||
GPU_shader_bind(shader);
|
||||
@@ -149,7 +148,40 @@ class DilateErodeOperation : public NodeOperation {
|
||||
return horizontal_pass_result;
|
||||
}
|
||||
|
||||
Result execute_step_horizontal_pass_cpu()
|
||||
{
|
||||
const Result &input = get_input("Mask");
|
||||
|
||||
/* We allocate an output image of a transposed size, that is, with a height equivalent to the
|
||||
* width of the input and vice versa. This is done as a performance optimization. The shader
|
||||
* will process the image horizontally and write it to the intermediate output transposed. Then
|
||||
* the vertical pass will execute the same horizontal pass shader, but since its input is
|
||||
* transposed, it will effectively do a vertical pass and write to the output transposed,
|
||||
* effectively undoing the transposition in the horizontal pass. This is done to improve
|
||||
* spatial cache locality in the shader and to avoid having two separate shaders for each of
|
||||
* the passes. */
|
||||
const Domain domain = compute_domain();
|
||||
const int2 transposed_domain = int2(domain.size.y, domain.size.x);
|
||||
|
||||
Result horizontal_pass_result = context().create_result(ResultType::Color);
|
||||
horizontal_pass_result.allocate_texture(transposed_domain);
|
||||
|
||||
this->execute_step_pass_cpu(input, horizontal_pass_result);
|
||||
|
||||
return horizontal_pass_result;
|
||||
}
|
||||
|
||||
void execute_step_vertical_pass(Result &horizontal_pass_result)
|
||||
{
|
||||
if (this->context().use_gpu()) {
|
||||
this->execute_step_vertical_pass_gpu(horizontal_pass_result);
|
||||
}
|
||||
else {
|
||||
this->execute_step_vertical_pass_cpu(horizontal_pass_result);
|
||||
}
|
||||
}
|
||||
|
||||
void execute_step_vertical_pass_gpu(Result &horizontal_pass_result)
|
||||
{
|
||||
GPUShader *shader = context().get_shader(get_morphological_step_shader_name());
|
||||
GPU_shader_bind(shader);
|
||||
@@ -173,6 +205,55 @@ class DilateErodeOperation : public NodeOperation {
|
||||
output_mask.unbind_as_image();
|
||||
}
|
||||
|
||||
void execute_step_vertical_pass_cpu(Result &horizontal_pass_result)
|
||||
{
|
||||
const Domain domain = compute_domain();
|
||||
Result &output_mask = get_result("Mask");
|
||||
output_mask.allocate_texture(domain);
|
||||
|
||||
this->execute_step_pass_cpu(horizontal_pass_result, output_mask);
|
||||
}
|
||||
|
||||
void execute_step_pass_cpu(const Result &input, Result &output)
|
||||
{
|
||||
/* We have specialized code for each sign, so use the absolute value. */
|
||||
const int radius = math::abs(this->get_distance());
|
||||
|
||||
/* Notice that the size is transposed, see the note on the horizontal pass method for more
|
||||
* information on the reasoning behind this. */
|
||||
const int2 size = int2(output.domain().size.y, output.domain().size.x);
|
||||
if (this->get_distance() > 0) {
|
||||
parallel_for(size, [&](const int2 texel) {
|
||||
/* Find the maximum value in the window of the given radius around the pixel. This
|
||||
* is essentially a morphological dilate operator with a square structuring element. */
|
||||
const float limit = std::numeric_limits<float>::lowest();
|
||||
float value = limit;
|
||||
for (int i = -radius; i <= radius; i++) {
|
||||
value = math::max(value, input.load_pixel_fallback(texel + int2(i, 0), float4(limit)).x);
|
||||
}
|
||||
|
||||
/* Write the value using the transposed texel. See the horizontal pass method
|
||||
* for more information on the rational behind this. */
|
||||
output.store_pixel(int2(texel.y, texel.x), float4(value));
|
||||
});
|
||||
}
|
||||
else {
|
||||
parallel_for(size, [&](const int2 texel) {
|
||||
/* Find the minimum value in the window of the given radius around the pixel. This
|
||||
* is essentially a morphological erode operator with a square structuring element. */
|
||||
const float limit = std::numeric_limits<float>::max();
|
||||
float value = limit;
|
||||
for (int i = -radius; i <= radius; i++) {
|
||||
value = math::min(value, input.load_pixel_fallback(texel + int2(i, 0), float4(limit)).x);
|
||||
}
|
||||
|
||||
/* Write the value using the transposed texel. See the horizontal pass method
|
||||
* for more information on the rational behind this. */
|
||||
output.store_pixel(int2(texel.y, texel.x), float4(value));
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
const char *get_morphological_step_shader_name()
|
||||
{
|
||||
if (get_distance() > 0) {
|
||||
@@ -195,6 +276,29 @@ class DilateErodeOperation : public NodeOperation {
|
||||
* ------------------------------------------ */
|
||||
|
||||
void execute_distance_threshold()
|
||||
{
|
||||
Result output_mask = context().create_result(ResultType::Float);
|
||||
|
||||
if (this->context().use_gpu()) {
|
||||
this->execute_distance_threshold_gpu(output_mask);
|
||||
}
|
||||
else {
|
||||
this->execute_distance_threshold_cpu(output_mask);
|
||||
}
|
||||
|
||||
/* For configurations where there is little user-specified inset, anti-alias the result for
|
||||
* smoother edges. */
|
||||
Result &output = this->get_result("Mask");
|
||||
if (this->get_inset() < 2.0f) {
|
||||
smaa(this->context(), output_mask, output);
|
||||
output_mask.release();
|
||||
}
|
||||
else {
|
||||
output.steal_data(output_mask);
|
||||
}
|
||||
}
|
||||
|
||||
void execute_distance_threshold_gpu(Result &output)
|
||||
{
|
||||
GPUShader *shader = context().get_shader("compositor_morphological_distance_threshold");
|
||||
GPU_shader_bind(shader);
|
||||
@@ -207,26 +311,114 @@ class DilateErodeOperation : public NodeOperation {
|
||||
input_mask.bind_as_texture(shader, "input_tx");
|
||||
|
||||
const Domain domain = compute_domain();
|
||||
Result output_mask = context().create_result(ResultType::Float);
|
||||
output_mask.allocate_texture(domain);
|
||||
output_mask.bind_as_image(shader, "output_img");
|
||||
output.allocate_texture(domain);
|
||||
output.bind_as_image(shader, "output_img");
|
||||
|
||||
compute_dispatch_threads_at_least(shader, domain.size);
|
||||
|
||||
GPU_shader_unbind();
|
||||
output_mask.unbind_as_image();
|
||||
output.unbind_as_image();
|
||||
input_mask.unbind_as_texture();
|
||||
}
|
||||
|
||||
/* For configurations where there is little user-specified inset, anti-alias the result for
|
||||
* smoother edges. */
|
||||
Result &output = get_result("Mask");
|
||||
if (get_inset() < 2.0f) {
|
||||
smaa(context(), output_mask, output);
|
||||
output_mask.release();
|
||||
}
|
||||
else {
|
||||
output.steal_data(output_mask);
|
||||
}
|
||||
void execute_distance_threshold_cpu(Result &output)
|
||||
{
|
||||
const Result &input = get_input("Mask");
|
||||
|
||||
const Domain domain = compute_domain();
|
||||
output.allocate_texture(domain);
|
||||
|
||||
const float inset = math::max(this->get_inset(), 10e-6f);
|
||||
const int radius = this->get_morphological_distance_threshold_radius();
|
||||
const int distance = this->get_distance();
|
||||
|
||||
/* The Morphological Distance Threshold operation is effectively three consecutive operations
|
||||
* implemented as a single operation. The three operations are as follows:
|
||||
*
|
||||
* .-----------. .--------------. .----------------.
|
||||
* | Threshold |-->| Dilate/Erode |-->| Distance Inset |
|
||||
* '-----------' '--------------' '----------------'
|
||||
*
|
||||
* The threshold operation just converts the input into a binary image, where the pixel is 1 if
|
||||
* it is larger than 0.5 and 0 otherwise. Pixels that are 1 in the output of the threshold
|
||||
* operation are said to be masked. The dilate/erode operation is a dilate or erode
|
||||
* morphological operation with a circular structuring element depending on the sign of the
|
||||
* distance, where it is a dilate operation if the distance is positive and an erode operation
|
||||
* otherwise. This is equivalent to the Morphological Distance operation, see its
|
||||
* implementation for more information. Finally, the distance inset is an operation that
|
||||
* converts the binary image into a narrow band distance field. That is, pixels that are
|
||||
* unmasked will remain 0, while pixels that are masked will start from zero at the boundary of
|
||||
* the masked region and linearly increase until reaching 1 in the span of a number pixels
|
||||
* given by the inset value.
|
||||
*
|
||||
* As a performance optimization, the dilate/erode operation is omitted and its effective
|
||||
* result is achieved by slightly adjusting the distance inset operation. The base distance
|
||||
* inset operation works by computing the signed distance from the current center pixel to the
|
||||
* nearest pixel with a different value. Since our image is a binary image, that means that if
|
||||
* the pixel is masked, we compute the signed distance to the nearest unmasked pixel, and if
|
||||
* the pixel unmasked, we compute the signed distance to the nearest masked pixel. The distance
|
||||
* is positive if the pixel is masked and negative otherwise. The distance is then normalized
|
||||
* by dividing by the given inset value and clamped to the [0, 1] range. Since distances larger
|
||||
* than the inset value are eventually clamped, the distance search window is limited to a
|
||||
* radius equivalent to the inset value.
|
||||
*
|
||||
* To archive the effective result of the omitted dilate/erode operation, we adjust the
|
||||
* distance inset operation as follows. First, we increase the radius of the distance search
|
||||
* window by the radius of the dilate/erode operation. Then we adjust the resulting narrow band
|
||||
* signed distance field as follows.
|
||||
*
|
||||
* For the erode case, we merely subtract the erode distance, which makes the outermost erode
|
||||
* distance number of pixels zero due to clamping, consequently achieving the result of the
|
||||
* erode, while retaining the needed inset because we increased the distance search window by
|
||||
* the same amount we subtracted.
|
||||
*
|
||||
* Similarly, for the dilate case, we add the dilate distance, which makes the dilate distance
|
||||
* number of pixels just outside of the masked region positive and part of the narrow band
|
||||
* distance field, consequently achieving the result of the dilate, while at the same time, the
|
||||
* innermost dilate distance number of pixels become 1 due to clamping, retaining the needed
|
||||
* inset because we increased the distance search window by the same amount we added.
|
||||
*
|
||||
* Since the erode/dilate distance is already signed appropriately as described before, we just
|
||||
* add it in both cases. */
|
||||
parallel_for(domain.size, [&](const int2 texel) {
|
||||
/* Apply a threshold operation on the center pixel, where the threshold is currently
|
||||
* hard-coded at 0.5. The pixels with values larger than the threshold are said to be masked.
|
||||
*/
|
||||
bool is_center_masked = input.load_pixel(texel).x > 0.5f;
|
||||
|
||||
/* Since the distance search window will access pixels outside of the bounds of the image, we
|
||||
* use a texture loader with a fallback value. And since we don't want those values to affect
|
||||
* the result, the fallback value is chosen such that the inner condition fails, which is
|
||||
* when the sampled pixel and the center pixel are the same, so choose a fallback that will
|
||||
* be considered masked if the center pixel is masked and unmasked otherwise. */
|
||||
float4 fallback = float4(is_center_masked ? 1.0f : 0.0f);
|
||||
|
||||
/* Since the distance search window is limited to the given radius, the maximum possible
|
||||
* squared distance to the center is double the squared radius. */
|
||||
int minimum_squared_distance = radius * radius * 2;
|
||||
|
||||
/* Find the squared distance to the nearest different pixel in the search window of the given
|
||||
* radius. */
|
||||
for (int y = -radius; y <= radius; y++) {
|
||||
for (int x = -radius; x <= radius; x++) {
|
||||
bool is_sample_masked = input.load_pixel_fallback(texel + int2(x, y), fallback).x > 0.5f;
|
||||
if (is_center_masked != is_sample_masked) {
|
||||
minimum_squared_distance = math::min(minimum_squared_distance, x * x + y * y);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Compute the actual distance from the squared distance and assign it an appropriate sign
|
||||
* depending on whether it lies in a masked region or not. */
|
||||
float signed_minimum_distance = math::sqrt(float(minimum_squared_distance)) *
|
||||
(is_center_masked ? 1.0f : -1.0f);
|
||||
|
||||
/* Add the erode/dilate distance and divide by the inset amount as described in the
|
||||
* discussion, then clamp to the [0, 1] range. */
|
||||
float value = math::clamp((signed_minimum_distance + distance) / inset, 0.0f, 1.0f);
|
||||
|
||||
output.store_pixel(texel, float4(value));
|
||||
});
|
||||
}
|
||||
|
||||
/* See the discussion in the implementation for more information. */
|
||||
@@ -282,7 +474,7 @@ class DilateErodeOperation : public NodeOperation {
|
||||
|
||||
CMPNodeDilateErodeMethod get_method()
|
||||
{
|
||||
return (CMPNodeDilateErodeMethod)bnode().custom1;
|
||||
return static_cast<CMPNodeDilateErodeMethod>(bnode().custom1);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user