317 lines
17 KiB
C++
317 lines
17 KiB
C++
/* SPDX-FileCopyrightText: 2024 Blender Authors
|
|
*
|
|
* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
|
|
#include <cmath>
|
|
#include <cstring>
|
|
#include <memory>
|
|
|
|
#include "BLI_array.hh"
|
|
#include "BLI_index_range.hh"
|
|
#include "BLI_math_base.hh"
|
|
#include "BLI_math_vector.h"
|
|
#include "BLI_math_vector.hh"
|
|
#include "BLI_task.hh"
|
|
|
|
#include "COM_GlareBloomOperation.h"
|
|
|
|
#define MAX_GLARE_SIZE 9
|
|
|
|
namespace blender::compositor {
|
|
|
|
static void upsample(const MemoryBuffer &input, MemoryBuffer &output)
|
|
{
|
|
const int2 output_size = int2(output.get_width(), output.get_height());
|
|
|
|
/* All the offsets in the following code section are in the normalized pixel space of the output
|
|
* image, so compute its normalized pixel size. */
|
|
float2 pixel_size = 1.0f / float2(output_size);
|
|
|
|
threading::parallel_for(IndexRange(output_size.y), 1, [&](const IndexRange sub_y_range) {
|
|
for (const int64_t y : sub_y_range) {
|
|
for (const int64_t x : IndexRange(output_size.x)) {
|
|
/* Each invocation corresponds to one output pixel, where the output has twice the size of
|
|
* the input. */
|
|
int2 texel = int2(x, y);
|
|
|
|
/* Add 0.5 to evaluate the buffer at the center of the pixel and divide by the image size
|
|
* to get the coordinates into the buffer's expected [0, 1] range. */
|
|
float2 coordinates = (float2(texel) + float2(0.5)) / float2(output_size);
|
|
|
|
/* Upsample by applying a 3x3 tent filter on the bi-linearly interpolated values evaluated
|
|
* at the center of neighboring output pixels. As more tent filter upsampling passes are
|
|
* applied, the result approximates a large sized Gaussian filter. This upsampling strategy
|
|
* is described in the talk:
|
|
*
|
|
* Next Generation Post Processing in Call of Duty: Advanced Warfare
|
|
* https://www.iryoku.com/next-generation-post-processing-in-call-of-duty-advanced-warfare
|
|
*
|
|
* In particular, the upsampling strategy is described and illustrated in slide 162 titled
|
|
* "Upsampling - Our Solution". */
|
|
float4 upsampled = float4(0.0f);
|
|
upsampled += (4.0f / 16.0f) * input.texture_bilinear_extend(coordinates);
|
|
upsampled += (2.0f / 16.0f) *
|
|
input.texture_bilinear_extend(coordinates + pixel_size * float2(-1.0f, 0.0f));
|
|
upsampled += (2.0f / 16.0f) *
|
|
input.texture_bilinear_extend(coordinates + pixel_size * float2(0.0f, 1.0f));
|
|
upsampled += (2.0f / 16.0f) *
|
|
input.texture_bilinear_extend(coordinates + pixel_size * float2(1.0f, 0.0f));
|
|
upsampled += (2.0f / 16.0f) *
|
|
input.texture_bilinear_extend(coordinates + pixel_size * float2(0.0f, -1.0f));
|
|
upsampled += (1.0f / 16.0f) * input.texture_bilinear_extend(
|
|
coordinates + pixel_size * float2(-1.0f, -1.0f));
|
|
upsampled += (1.0f / 16.0f) *
|
|
input.texture_bilinear_extend(coordinates + pixel_size * float2(-1.0f, 1.0f));
|
|
upsampled += (1.0f / 16.0f) *
|
|
input.texture_bilinear_extend(coordinates + pixel_size * float2(1.0f, -1.0f));
|
|
upsampled += (1.0f / 16.0f) *
|
|
input.texture_bilinear_extend(coordinates + pixel_size * float2(1.0f, 1.0f));
|
|
|
|
const float4 original_value = output.get_elem(texel.x, texel.y);
|
|
copy_v4_v4(output.get_elem(texel.x, texel.y), original_value + upsampled);
|
|
}
|
|
}
|
|
});
|
|
}
|
|
|
|
/* Computes the weighted average of the given four colors, which are assumed to the colors of
|
|
* spatially neighboring pixels. The weights are computed so as to reduce the contributions of
|
|
* fireflies on the result by applying a form of local tone mapping as described by Brian Karis in
|
|
* the article "Graphic Rants: Tone Mapping".
|
|
*
|
|
* https://graphicrants.blogspot.com/2013/12/tone-mapping.html */
|
|
static float4 karis_brightness_weighted_sum(float4 color1,
|
|
float4 color2,
|
|
float4 color3,
|
|
float4 color4)
|
|
{
|
|
const float4 brightness = float4(math::reduce_max(color1.xyz()),
|
|
math::reduce_max(color2.xyz()),
|
|
math::reduce_max(color3.xyz()),
|
|
math::reduce_max(color4.xyz()));
|
|
const float4 weights = 1.0f / (brightness + 1.0);
|
|
const float weights_sum = math::reduce_add(weights);
|
|
const float4 sum = color1 * weights[0] + color2 * weights[1] + color3 * weights[2] +
|
|
color4 * weights[3];
|
|
return math::safe_divide(sum, weights_sum);
|
|
}
|
|
|
|
static void downsample(const MemoryBuffer &input, MemoryBuffer &output, bool use_karis_average)
|
|
{
|
|
const int2 input_size = int2(input.get_width(), input.get_height());
|
|
const int2 output_size = int2(output.get_width(), output.get_height());
|
|
|
|
/* All the offsets in the following code section are in the normalized pixel space of the
|
|
* input.texture_bilinear_extend, so compute its normalized pixel size. */
|
|
float2 pixel_size = 1.0f / float2(input_size);
|
|
|
|
threading::parallel_for(IndexRange(output_size.y), 1, [&](const IndexRange sub_y_range) {
|
|
for (const int64_t y : sub_y_range) {
|
|
for (const int64_t x : IndexRange(output_size.x)) {
|
|
/* Each invocation corresponds to one output pixel, where the output has half the size of
|
|
* the input. */
|
|
int2 texel = int2(x, y);
|
|
|
|
/* Add 0.5 to evaluate the buffer at the center of the pixel and divide by the image size
|
|
* to get the coordinates into the buffer's expected [0, 1] range. */
|
|
float2 coordinates = (float2(texel) + float2(0.5f)) / float2(output_size);
|
|
|
|
/* Each invocation downsamples a 6x6 area of pixels around the center of the corresponding
|
|
* output pixel, but instead of sampling each of the 36 pixels in the area, we only sample
|
|
* 13 positions using bilinear fetches at the center of a number of overlapping square
|
|
* 4-pixel groups. This downsampling strategy is described in the talk:
|
|
*
|
|
* Next Generation Post Processing in Call of Duty: Advanced Warfare
|
|
* https://www.iryoku.com/next-generation-post-processing-in-call-of-duty-advanced-warfare
|
|
*
|
|
* In particular, the downsampling strategy is described and illustrated in slide 153
|
|
* titled "Downsampling - Our Solution". This is employed as it significantly improves the
|
|
* stability of the glare as can be seen in the videos in the talk. */
|
|
float4 center = input.texture_bilinear_extend(coordinates);
|
|
float4 upper_left_near = input.texture_bilinear_extend(coordinates +
|
|
pixel_size * float2(-1.0f, 1.0f));
|
|
float4 upper_right_near = input.texture_bilinear_extend(coordinates +
|
|
pixel_size * float2(1.0f, 1.0f));
|
|
float4 lower_left_near = input.texture_bilinear_extend(coordinates +
|
|
pixel_size * float2(-1.0f, -1.0f));
|
|
float4 lower_right_near = input.texture_bilinear_extend(coordinates +
|
|
pixel_size * float2(1.0f, -1.0f));
|
|
float4 left_far = input.texture_bilinear_extend(coordinates +
|
|
pixel_size * float2(-2.0f, 0.0f));
|
|
float4 right_far = input.texture_bilinear_extend(coordinates +
|
|
pixel_size * float2(2.0f, 0.0f));
|
|
float4 upper_far = input.texture_bilinear_extend(coordinates +
|
|
pixel_size * float2(0.0f, 2.0f));
|
|
float4 lower_far = input.texture_bilinear_extend(coordinates +
|
|
pixel_size * float2(0.0f, -2.0f));
|
|
float4 upper_left_far = input.texture_bilinear_extend(coordinates +
|
|
pixel_size * float2(-2.0f, 2.0f));
|
|
float4 upper_right_far = input.texture_bilinear_extend(coordinates +
|
|
pixel_size * float2(2.0f, 2.0f));
|
|
float4 lower_left_far = input.texture_bilinear_extend(coordinates +
|
|
pixel_size * float2(-2.0f, -2.0f));
|
|
float4 lower_right_far = input.texture_bilinear_extend(coordinates +
|
|
pixel_size * float2(2.0f, -2.0f));
|
|
|
|
if (!use_karis_average) {
|
|
/* The original weights equation mentioned in slide 153 is:
|
|
* 0.5 + 0.125 + 0.125 + 0.125 + 0.125 = 1
|
|
* The 0.5 corresponds to the center group of pixels and the 0.125 corresponds to the
|
|
* other groups of pixels. The center is sampled 4 times, the far non corner pixels are
|
|
* sampled 2 times, the near corner pixels are sampled only once; but their weight is
|
|
* quadruple the weights of other groups; so they count as sampled 4 times, finally the
|
|
* far corner pixels are sampled only once, essentially totaling 32 samples. So the
|
|
* weights are as used in the following code section. */
|
|
float4 result = (4.0f / 32.0f) * center +
|
|
(4.0f / 32.0f) * (upper_left_near + upper_right_near + lower_left_near +
|
|
lower_right_near) +
|
|
(2.0f / 32.0f) * (left_far + right_far + upper_far + lower_far) +
|
|
(1.0f / 32.0f) * (upper_left_far + upper_right_far + lower_left_far +
|
|
lower_right_far);
|
|
copy_v4_v4(output.get_elem(texel.x, texel.y), result);
|
|
}
|
|
else {
|
|
/* Reduce the contributions of fireflies on the result by reducing each group of pixels
|
|
* using a Karis brightness weighted sum. This is described in slide 168 titled
|
|
* "Fireflies - Partial Karis Average".
|
|
*
|
|
* This needn't be done on all downsampling passes, but only the first one, since
|
|
* fireflies will not survive the first pass, later passes can use the weighted average.
|
|
*/
|
|
float4 center_weighted_sum = karis_brightness_weighted_sum(
|
|
upper_left_near, upper_right_near, lower_right_near, lower_left_near);
|
|
float4 upper_left_weighted_sum = karis_brightness_weighted_sum(
|
|
upper_left_far, upper_far, center, left_far);
|
|
float4 upper_right_weighted_sum = karis_brightness_weighted_sum(
|
|
upper_far, upper_right_far, right_far, center);
|
|
float4 lower_right_weighted_sum = karis_brightness_weighted_sum(
|
|
center, right_far, lower_right_far, lower_far);
|
|
float4 lower_left_weighted_sum = karis_brightness_weighted_sum(
|
|
left_far, center, lower_far, lower_left_far);
|
|
|
|
/* The original weights equation mentioned in slide 153 is:
|
|
* 0.5 + 0.125 + 0.125 + 0.125 + 0.125 = 1
|
|
* Multiply both sides by 8 and you get:
|
|
* 4 + 1 + 1 + 1 + 1 = 8
|
|
* So the weights are as used in the following code section. */
|
|
float4 result = (4.0f / 8.0f) * center_weighted_sum +
|
|
(1.0f / 8.0f) * (upper_left_weighted_sum + upper_right_weighted_sum +
|
|
lower_left_weighted_sum + lower_right_weighted_sum);
|
|
copy_v4_v4(output.get_elem(texel.x, texel.y), result);
|
|
}
|
|
}
|
|
}
|
|
});
|
|
}
|
|
|
|
/* Progressively down-sample the given buffer into a buffer with half the size for the given
|
|
* chain length, returning an array containing the chain of down-sampled buffers. The first
|
|
* buffer of the chain is the given buffer itself for easier handling. The chain length is
|
|
* expected not to exceed the binary logarithm of the smaller dimension of the given buffer,
|
|
* because that would buffer in down-sampling passes that produce useless textures with just
|
|
* one pixel. */
|
|
static Array<std::unique_ptr<MemoryBuffer>> compute_bloom_downsample_chain(
|
|
MemoryBuffer &highlights, int chain_length)
|
|
{
|
|
Array<std::unique_ptr<MemoryBuffer>> downsample_chain(chain_length);
|
|
|
|
/* We append the original highlights buffer to the first buffer of the chain to make the code
|
|
* easier. In turn, the number of passes is one less than the chain length, because the first
|
|
* buffer needn't be computed. */
|
|
downsample_chain[0] = std::make_unique<MemoryBuffer>(highlights);
|
|
const IndexRange downsample_passes_range(chain_length - 1);
|
|
|
|
for (const int i : downsample_passes_range) {
|
|
const MemoryBuffer &input = *downsample_chain[i];
|
|
|
|
const int2 input_size = int2(input.get_width(), input.get_height());
|
|
const int2 output_size = input_size / 2;
|
|
|
|
rcti output_rect;
|
|
BLI_rcti_init(&output_rect, 0, output_size.x, 0, output_size.y);
|
|
downsample_chain[i + 1] = std::make_unique<MemoryBuffer>(DataType::Color, output_rect, false);
|
|
MemoryBuffer &output = *downsample_chain[i + 1];
|
|
|
|
/* For the first down-sample pass, we use a special "Karis" down-sample pass that applies a
|
|
* form of local tone mapping to reduce the contributions of fireflies, see the shader for
|
|
* more information. Later passes use a simple average down-sampling filter because fireflies
|
|
* doesn't service the first pass. */
|
|
const bool use_karis_average = i == downsample_passes_range.first();
|
|
downsample(input, output, use_karis_average);
|
|
}
|
|
|
|
return downsample_chain;
|
|
}
|
|
|
|
/* The size of the bloom relative to its maximum possible size, see the
|
|
* compute_bloom_size_halving_count() method for more information. */
|
|
static int get_bloom_size(const NodeGlare *settings)
|
|
{
|
|
return settings->size;
|
|
}
|
|
|
|
/* The bloom has a maximum possible size when the bloom size is equal to MAX_GLARE_SIZE and
|
|
* halves for every unit decrement of the bloom size. This method computes the number of halving
|
|
* that should take place, which is simply the difference to MAX_GLARE_SIZE. */
|
|
static int compute_bloom_size_halving_count(const NodeGlare *settings)
|
|
{
|
|
return MAX_GLARE_SIZE - get_bloom_size(settings);
|
|
}
|
|
|
|
/* Bloom is computed by first progressively half-down-sampling the highlights down to a certain
|
|
* size, then progressively double-up-sampling the last down-sampled buffer up to the original size
|
|
* of the highlights, adding the down-sampled buffer of the same size in each up-sampling step.
|
|
* This can be illustrated as follows:
|
|
*
|
|
* Highlights ---+---> Bloom
|
|
* | |
|
|
* Down-sampled ---+---> Up-sampled
|
|
* | |
|
|
* Down-sampled ---+---> Up-sampled
|
|
* | |
|
|
* Down-sampled ---+---> Up-sampled
|
|
* | ^
|
|
* ... |
|
|
* Down-sampled ------------'
|
|
*
|
|
* The smooth down-sampling followed by smooth up-sampling can be thought of as a cheap way to
|
|
* approximate a large radius blur, and adding the corresponding down-sampled buffer while
|
|
* up-sampling is done to counter the attenuation that happens during down-sampling.
|
|
*
|
|
* Smaller down-sampled buffers contribute to larger glare size, so controlling the size can be
|
|
* done by stopping down-sampling down to a certain size, where the maximum possible size is
|
|
* achieved when down-sampling happens down to the smallest size of 2. */
|
|
void GlareBloomOperation::generate_glare(float *output,
|
|
MemoryBuffer *highlights,
|
|
const NodeGlare *settings)
|
|
{
|
|
/* The maximum possible glare size is achieved when we down-sampled down to the smallest size
|
|
* of 2, which would buffer in a down-sampling chain length of the binary logarithm of the
|
|
* smaller dimension of the size of the highlights.
|
|
*
|
|
* However, as users might want a smaller glare size, we reduce the chain length by the halving
|
|
* count supplied by the user. */
|
|
const int2 size = int2(highlights->get_width(), highlights->get_height());
|
|
const int smaller_glare_dimension = math::min(size.x, size.y);
|
|
const int chain_length = int(std::log2(smaller_glare_dimension)) -
|
|
compute_bloom_size_halving_count(settings);
|
|
|
|
Array<std::unique_ptr<MemoryBuffer>> downsample_chain = compute_bloom_downsample_chain(
|
|
*highlights, chain_length);
|
|
|
|
/* Notice that for a chain length of n, we need (n - 1) up-sampling passes. */
|
|
const IndexRange upsample_passes_range(chain_length - 1);
|
|
|
|
for (const int i : upsample_passes_range) {
|
|
const MemoryBuffer &input = *downsample_chain[upsample_passes_range.last() - i + 1];
|
|
MemoryBuffer &output = *downsample_chain[upsample_passes_range.last() - i];
|
|
upsample(input, output);
|
|
}
|
|
|
|
memcpy(output,
|
|
downsample_chain[0]->get_buffer(),
|
|
size.x * size.y * COM_DATA_TYPE_COLOR_CHANNELS * sizeof(float));
|
|
}
|
|
|
|
} // namespace blender::compositor
|