The Bloom mode of the Glare node crashes if the input image is too small. This is because bloom is computed using a down-sampling followed by an up-sampling chain, and if the user supplied size is not maximum, the computed chain length might be zero or negative, which is not handled gracefully. To fix this, we just sanitize the chain length. Pull Request: https://projects.blender.org/blender/blender/pulls/124089
327 lines
17 KiB
C++
327 lines
17 KiB
C++
/* SPDX-FileCopyrightText: 2024 Blender Authors
|
|
*
|
|
* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
|
|
#include <cmath>
|
|
#include <cstring>
|
|
#include <memory>
|
|
|
|
#include "BLI_array.hh"
|
|
#include "BLI_index_range.hh"
|
|
#include "BLI_math_base.hh"
|
|
#include "BLI_math_vector.h"
|
|
#include "BLI_math_vector.hh"
|
|
#include "BLI_task.hh"
|
|
|
|
#include "COM_GlareBloomOperation.h"
|
|
|
|
#define MAX_GLARE_SIZE 9
|
|
|
|
namespace blender::compositor {
|
|
|
|
static void upsample(const MemoryBuffer &input, MemoryBuffer &output)
|
|
{
|
|
const int2 output_size = int2(output.get_width(), output.get_height());
|
|
|
|
/* All the offsets in the following code section are in the normalized pixel space of the output
|
|
* image, so compute its normalized pixel size. */
|
|
float2 pixel_size = 1.0f / float2(output_size);
|
|
|
|
threading::parallel_for(IndexRange(output_size.y), 1, [&](const IndexRange sub_y_range) {
|
|
for (const int64_t y : sub_y_range) {
|
|
for (const int64_t x : IndexRange(output_size.x)) {
|
|
/* Each invocation corresponds to one output pixel, where the output has twice the size of
|
|
* the input. */
|
|
int2 texel = int2(x, y);
|
|
|
|
/* Add 0.5 to evaluate the buffer at the center of the pixel and divide by the image size
|
|
* to get the coordinates into the buffer's expected [0, 1] range. */
|
|
float2 coordinates = (float2(texel) + float2(0.5)) / float2(output_size);
|
|
|
|
/* Upsample by applying a 3x3 tent filter on the bi-linearly interpolated values evaluated
|
|
* at the center of neighboring output pixels. As more tent filter upsampling passes are
|
|
* applied, the result approximates a large sized Gaussian filter. This upsampling strategy
|
|
* is described in the talk:
|
|
*
|
|
* Next Generation Post Processing in Call of Duty: Advanced Warfare
|
|
* https://www.iryoku.com/next-generation-post-processing-in-call-of-duty-advanced-warfare
|
|
*
|
|
* In particular, the upsampling strategy is described and illustrated in slide 162 titled
|
|
* "Upsampling - Our Solution". */
|
|
float4 upsampled = float4(0.0f);
|
|
upsampled += (4.0f / 16.0f) * input.texture_bilinear_extend(coordinates);
|
|
upsampled += (2.0f / 16.0f) *
|
|
input.texture_bilinear_extend(coordinates + pixel_size * float2(-1.0f, 0.0f));
|
|
upsampled += (2.0f / 16.0f) *
|
|
input.texture_bilinear_extend(coordinates + pixel_size * float2(0.0f, 1.0f));
|
|
upsampled += (2.0f / 16.0f) *
|
|
input.texture_bilinear_extend(coordinates + pixel_size * float2(1.0f, 0.0f));
|
|
upsampled += (2.0f / 16.0f) *
|
|
input.texture_bilinear_extend(coordinates + pixel_size * float2(0.0f, -1.0f));
|
|
upsampled += (1.0f / 16.0f) * input.texture_bilinear_extend(
|
|
coordinates + pixel_size * float2(-1.0f, -1.0f));
|
|
upsampled += (1.0f / 16.0f) *
|
|
input.texture_bilinear_extend(coordinates + pixel_size * float2(-1.0f, 1.0f));
|
|
upsampled += (1.0f / 16.0f) *
|
|
input.texture_bilinear_extend(coordinates + pixel_size * float2(1.0f, -1.0f));
|
|
upsampled += (1.0f / 16.0f) *
|
|
input.texture_bilinear_extend(coordinates + pixel_size * float2(1.0f, 1.0f));
|
|
|
|
const float4 original_value = output.get_elem(texel.x, texel.y);
|
|
copy_v4_v4(output.get_elem(texel.x, texel.y), original_value + upsampled);
|
|
}
|
|
}
|
|
});
|
|
}
|
|
|
|
/* Computes the weighted average of the given four colors, which are assumed to the colors of
|
|
* spatially neighboring pixels. The weights are computed so as to reduce the contributions of
|
|
* fireflies on the result by applying a form of local tone mapping as described by Brian Karis in
|
|
* the article "Graphic Rants: Tone Mapping".
|
|
*
|
|
* https://graphicrants.blogspot.com/2013/12/tone-mapping.html */
|
|
static float4 karis_brightness_weighted_sum(float4 color1,
|
|
float4 color2,
|
|
float4 color3,
|
|
float4 color4)
|
|
{
|
|
const float4 brightness = float4(math::reduce_max(color1.xyz()),
|
|
math::reduce_max(color2.xyz()),
|
|
math::reduce_max(color3.xyz()),
|
|
math::reduce_max(color4.xyz()));
|
|
const float4 weights = 1.0f / (brightness + 1.0);
|
|
const float weights_sum = math::reduce_add(weights);
|
|
const float4 sum = color1 * weights[0] + color2 * weights[1] + color3 * weights[2] +
|
|
color4 * weights[3];
|
|
return math::safe_divide(sum, weights_sum);
|
|
}
|
|
|
|
static void downsample(const MemoryBuffer &input, MemoryBuffer &output, bool use_karis_average)
|
|
{
|
|
const int2 input_size = int2(input.get_width(), input.get_height());
|
|
const int2 output_size = int2(output.get_width(), output.get_height());
|
|
|
|
/* All the offsets in the following code section are in the normalized pixel space of the
|
|
* input.texture_bilinear_extend, so compute its normalized pixel size. */
|
|
float2 pixel_size = 1.0f / float2(input_size);
|
|
|
|
threading::parallel_for(IndexRange(output_size.y), 1, [&](const IndexRange sub_y_range) {
|
|
for (const int64_t y : sub_y_range) {
|
|
for (const int64_t x : IndexRange(output_size.x)) {
|
|
/* Each invocation corresponds to one output pixel, where the output has half the size of
|
|
* the input. */
|
|
int2 texel = int2(x, y);
|
|
|
|
/* Add 0.5 to evaluate the buffer at the center of the pixel and divide by the image size
|
|
* to get the coordinates into the buffer's expected [0, 1] range. */
|
|
float2 coordinates = (float2(texel) + float2(0.5f)) / float2(output_size);
|
|
|
|
/* Each invocation downsamples a 6x6 area of pixels around the center of the corresponding
|
|
* output pixel, but instead of sampling each of the 36 pixels in the area, we only sample
|
|
* 13 positions using bilinear fetches at the center of a number of overlapping square
|
|
* 4-pixel groups. This downsampling strategy is described in the talk:
|
|
*
|
|
* Next Generation Post Processing in Call of Duty: Advanced Warfare
|
|
* https://www.iryoku.com/next-generation-post-processing-in-call-of-duty-advanced-warfare
|
|
*
|
|
* In particular, the downsampling strategy is described and illustrated in slide 153
|
|
* titled "Downsampling - Our Solution". This is employed as it significantly improves the
|
|
* stability of the glare as can be seen in the videos in the talk. */
|
|
float4 center = input.texture_bilinear_extend(coordinates);
|
|
float4 upper_left_near = input.texture_bilinear_extend(coordinates +
|
|
pixel_size * float2(-1.0f, 1.0f));
|
|
float4 upper_right_near = input.texture_bilinear_extend(coordinates +
|
|
pixel_size * float2(1.0f, 1.0f));
|
|
float4 lower_left_near = input.texture_bilinear_extend(coordinates +
|
|
pixel_size * float2(-1.0f, -1.0f));
|
|
float4 lower_right_near = input.texture_bilinear_extend(coordinates +
|
|
pixel_size * float2(1.0f, -1.0f));
|
|
float4 left_far = input.texture_bilinear_extend(coordinates +
|
|
pixel_size * float2(-2.0f, 0.0f));
|
|
float4 right_far = input.texture_bilinear_extend(coordinates +
|
|
pixel_size * float2(2.0f, 0.0f));
|
|
float4 upper_far = input.texture_bilinear_extend(coordinates +
|
|
pixel_size * float2(0.0f, 2.0f));
|
|
float4 lower_far = input.texture_bilinear_extend(coordinates +
|
|
pixel_size * float2(0.0f, -2.0f));
|
|
float4 upper_left_far = input.texture_bilinear_extend(coordinates +
|
|
pixel_size * float2(-2.0f, 2.0f));
|
|
float4 upper_right_far = input.texture_bilinear_extend(coordinates +
|
|
pixel_size * float2(2.0f, 2.0f));
|
|
float4 lower_left_far = input.texture_bilinear_extend(coordinates +
|
|
pixel_size * float2(-2.0f, -2.0f));
|
|
float4 lower_right_far = input.texture_bilinear_extend(coordinates +
|
|
pixel_size * float2(2.0f, -2.0f));
|
|
|
|
if (!use_karis_average) {
|
|
/* The original weights equation mentioned in slide 153 is:
|
|
* 0.5 + 0.125 + 0.125 + 0.125 + 0.125 = 1
|
|
* The 0.5 corresponds to the center group of pixels and the 0.125 corresponds to the
|
|
* other groups of pixels. The center is sampled 4 times, the far non corner pixels are
|
|
* sampled 2 times, the near corner pixels are sampled only once; but their weight is
|
|
* quadruple the weights of other groups; so they count as sampled 4 times, finally the
|
|
* far corner pixels are sampled only once, essentially totaling 32 samples. So the
|
|
* weights are as used in the following code section. */
|
|
float4 result = (4.0f / 32.0f) * center +
|
|
(4.0f / 32.0f) * (upper_left_near + upper_right_near + lower_left_near +
|
|
lower_right_near) +
|
|
(2.0f / 32.0f) * (left_far + right_far + upper_far + lower_far) +
|
|
(1.0f / 32.0f) * (upper_left_far + upper_right_far + lower_left_far +
|
|
lower_right_far);
|
|
copy_v4_v4(output.get_elem(texel.x, texel.y), result);
|
|
}
|
|
else {
|
|
/* Reduce the contributions of fireflies on the result by reducing each group of pixels
|
|
* using a Karis brightness weighted sum. This is described in slide 168 titled
|
|
* "Fireflies - Partial Karis Average".
|
|
*
|
|
* This needn't be done on all downsampling passes, but only the first one, since
|
|
* fireflies will not survive the first pass, later passes can use the weighted average.
|
|
*/
|
|
float4 center_weighted_sum = karis_brightness_weighted_sum(
|
|
upper_left_near, upper_right_near, lower_right_near, lower_left_near);
|
|
float4 upper_left_weighted_sum = karis_brightness_weighted_sum(
|
|
upper_left_far, upper_far, center, left_far);
|
|
float4 upper_right_weighted_sum = karis_brightness_weighted_sum(
|
|
upper_far, upper_right_far, right_far, center);
|
|
float4 lower_right_weighted_sum = karis_brightness_weighted_sum(
|
|
center, right_far, lower_right_far, lower_far);
|
|
float4 lower_left_weighted_sum = karis_brightness_weighted_sum(
|
|
left_far, center, lower_far, lower_left_far);
|
|
|
|
/* The original weights equation mentioned in slide 153 is:
|
|
* 0.5 + 0.125 + 0.125 + 0.125 + 0.125 = 1
|
|
* Multiply both sides by 8 and you get:
|
|
* 4 + 1 + 1 + 1 + 1 = 8
|
|
* So the weights are as used in the following code section. */
|
|
float4 result = (4.0f / 8.0f) * center_weighted_sum +
|
|
(1.0f / 8.0f) * (upper_left_weighted_sum + upper_right_weighted_sum +
|
|
lower_left_weighted_sum + lower_right_weighted_sum);
|
|
copy_v4_v4(output.get_elem(texel.x, texel.y), result);
|
|
}
|
|
}
|
|
}
|
|
});
|
|
}
|
|
|
|
/* Progressively down-sample the given buffer into a buffer with half the size for the given
|
|
* chain length, returning an array containing the chain of down-sampled buffers. The first
|
|
* buffer of the chain is the given buffer itself for easier handling. The chain length is
|
|
* expected not to exceed the binary logarithm of the smaller dimension of the given buffer,
|
|
* because that would buffer in down-sampling passes that produce useless textures with just
|
|
* one pixel. */
|
|
static Array<std::unique_ptr<MemoryBuffer>> compute_bloom_downsample_chain(
|
|
MemoryBuffer &highlights, int chain_length)
|
|
{
|
|
Array<std::unique_ptr<MemoryBuffer>> downsample_chain(chain_length);
|
|
|
|
/* We append the original highlights buffer to the first buffer of the chain to make the code
|
|
* easier. In turn, the number of passes is one less than the chain length, because the first
|
|
* buffer needn't be computed. */
|
|
downsample_chain[0] = std::make_unique<MemoryBuffer>(highlights);
|
|
const IndexRange downsample_passes_range(chain_length - 1);
|
|
|
|
for (const int i : downsample_passes_range) {
|
|
const MemoryBuffer &input = *downsample_chain[i];
|
|
|
|
const int2 input_size = int2(input.get_width(), input.get_height());
|
|
const int2 output_size = input_size / 2;
|
|
|
|
rcti output_rect;
|
|
BLI_rcti_init(&output_rect, 0, output_size.x, 0, output_size.y);
|
|
downsample_chain[i + 1] = std::make_unique<MemoryBuffer>(DataType::Color, output_rect, false);
|
|
MemoryBuffer &output = *downsample_chain[i + 1];
|
|
|
|
/* For the first down-sample pass, we use a special "Karis" down-sample pass that applies a
|
|
* form of local tone mapping to reduce the contributions of fireflies, see the shader for
|
|
* more information. Later passes use a simple average down-sampling filter because fireflies
|
|
* doesn't service the first pass. */
|
|
const bool use_karis_average = i == downsample_passes_range.first();
|
|
downsample(input, output, use_karis_average);
|
|
}
|
|
|
|
return downsample_chain;
|
|
}
|
|
|
|
/* The size of the bloom relative to its maximum possible size, see the
|
|
* compute_bloom_size_halving_count() method for more information. */
|
|
static int get_bloom_size(const NodeGlare *settings)
|
|
{
|
|
return settings->size;
|
|
}
|
|
|
|
/* The bloom has a maximum possible size when the bloom size is equal to MAX_GLARE_SIZE and
|
|
* halves for every unit decrement of the bloom size. This method computes the number of halving
|
|
* that should take place, which is simply the difference to MAX_GLARE_SIZE. */
|
|
static int compute_bloom_size_halving_count(const NodeGlare *settings)
|
|
{
|
|
return MAX_GLARE_SIZE - get_bloom_size(settings);
|
|
}
|
|
|
|
/* Bloom is computed by first progressively half-down-sampling the highlights down to a certain
|
|
* size, then progressively double-up-sampling the last down-sampled buffer up to the original size
|
|
* of the highlights, adding the down-sampled buffer of the same size in each up-sampling step.
|
|
* This can be illustrated as follows:
|
|
*
|
|
* Highlights ---+---> Bloom
|
|
* | |
|
|
* Down-sampled ---+---> Up-sampled
|
|
* | |
|
|
* Down-sampled ---+---> Up-sampled
|
|
* | |
|
|
* Down-sampled ---+---> Up-sampled
|
|
* | ^
|
|
* ... |
|
|
* Down-sampled ------------'
|
|
*
|
|
* The smooth down-sampling followed by smooth up-sampling can be thought of as a cheap way to
|
|
* approximate a large radius blur, and adding the corresponding down-sampled buffer while
|
|
* up-sampling is done to counter the attenuation that happens during down-sampling.
|
|
*
|
|
* Smaller down-sampled buffers contribute to larger glare size, so controlling the size can be
|
|
* done by stopping down-sampling down to a certain size, where the maximum possible size is
|
|
* achieved when down-sampling happens down to the smallest size of 2. */
|
|
void GlareBloomOperation::generate_glare(float *output,
|
|
MemoryBuffer *highlights,
|
|
const NodeGlare *settings)
|
|
{
|
|
/* The maximum possible glare size is achieved when we down-sampled down to the smallest size
|
|
* of 2, which would buffer in a down-sampling chain length of the binary logarithm of the
|
|
* smaller dimension of the size of the highlights.
|
|
*
|
|
* However, as users might want a smaller glare size, we reduce the chain length by the halving
|
|
* count supplied by the user. */
|
|
const int2 size = int2(highlights->get_width(), highlights->get_height());
|
|
const int smaller_glare_dimension = math::min(size.x, size.y);
|
|
const int chain_length = int(std::log2(smaller_glare_dimension)) -
|
|
compute_bloom_size_halving_count(settings);
|
|
|
|
/* If the chain length is less than 2, that means no down-sampling will happen, so we just copy
|
|
* the highlights to the output. This is a sanitization of a corner case, so no need to worry
|
|
* about optimizing the copy away. */
|
|
if (chain_length < 2) {
|
|
memcpy(output,
|
|
highlights->get_buffer(),
|
|
size.x * size.y * COM_DATA_TYPE_COLOR_CHANNELS * sizeof(float));
|
|
return;
|
|
}
|
|
|
|
Array<std::unique_ptr<MemoryBuffer>> downsample_chain = compute_bloom_downsample_chain(
|
|
*highlights, chain_length);
|
|
|
|
/* Notice that for a chain length of n, we need (n - 1) up-sampling passes. */
|
|
const IndexRange upsample_passes_range(chain_length - 1);
|
|
|
|
for (const int i : upsample_passes_range) {
|
|
const MemoryBuffer &input = *downsample_chain[upsample_passes_range.last() - i + 1];
|
|
MemoryBuffer &output = *downsample_chain[upsample_passes_range.last() - i];
|
|
upsample(input, output);
|
|
}
|
|
|
|
memcpy(output,
|
|
downsample_chain[0]->get_buffer(),
|
|
size.x * size.y * COM_DATA_TYPE_COLOR_CHANNELS * sizeof(float));
|
|
}
|
|
|
|
} // namespace blender::compositor
|