503 lines
19 KiB
C++
503 lines
19 KiB
C++
/* SPDX-FileCopyrightText: 2024 Blender Authors
|
|
*
|
|
* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
|
|
#include <cmath>
|
|
#include <cstring>
|
|
#include <memory>
|
|
|
|
#include "BLI_array.hh"
|
|
#include "BLI_index_range.hh"
|
|
#include "BLI_math_base.hh"
|
|
#include "BLI_math_vector.h"
|
|
#include "BLI_math_vector.hh"
|
|
#include "BLI_task.hh"
|
|
|
|
#include "COM_VectorBlurOperation.h"
|
|
|
|
/* This is identical to the compositor implementation in compositor_motion_blur_info.hh and its
|
|
* related files with the necessary adjustments to make it work for the CPU. */
|
|
|
|
#define MOTION_BLUR_TILE_SIZE 32
|
|
#define DEPTH_SCALE 100.0f
|
|
|
|
namespace blender::compositor {
|
|
|
|
VectorBlurOperation::VectorBlurOperation()
|
|
{
|
|
this->add_input_socket(DataType::Color);
|
|
this->add_input_socket(DataType::Value);
|
|
this->add_input_socket(DataType::Color);
|
|
this->add_output_socket(DataType::Color);
|
|
settings_ = nullptr;
|
|
}
|
|
|
|
/* Returns the input velocity that has the larger magnitude. */
|
|
static float2 max_velocity(const float2 &a, const float2 &b)
|
|
{
|
|
return math::length_squared(a) > math::length_squared(b) ? a : b;
|
|
}
|
|
|
|
/* Identical to motion_blur_tile_indirection_pack_payload, encodes the value and its texel such
|
|
* that the integer length of the value is encoded in the most significant bits, then the x value
|
|
* of the texel are encoded in the middle bits, then the y value of the texel is stored in the
|
|
* least significant bits. */
|
|
static uint32_t velocity_atomic_max_value(const float2 &value, const int2 &texel)
|
|
{
|
|
const uint32_t length_bits = math::min(uint32_t(math::ceil(math::length(value))), 0x3FFFu);
|
|
return (length_bits << 18u) | ((texel.x & 0x1FFu) << 9u) | (texel.y & 0x1FFu);
|
|
}
|
|
|
|
/* Returns the input velocity that has the larger integer magnitude, and if equal the larger x
|
|
* texel coordinates, and if equal, the larger y texel coordinates. It might be weird that we use
|
|
* an approximate comparison, but this is used for compatibility with the GPU code, which uses
|
|
* atomic integer operations, hence the limited precision. See velocity_atomic_max_value for more
|
|
* information. */
|
|
static float2 max_velocity_approximate(const float2 &a,
|
|
const float2 &b,
|
|
const int2 &a_texel,
|
|
const int2 &b_texel)
|
|
{
|
|
return velocity_atomic_max_value(a, a_texel) > velocity_atomic_max_value(b, b_texel) ? a : b;
|
|
}
|
|
|
|
/* Reduces each 32x32 block of velocity pixels into a single velocity whose magnitude is largest.
|
|
* Each of the previous and next velocities are reduces independently. */
|
|
static MemoryBuffer compute_max_tile_velocity(MemoryBuffer *velocity_buffer)
|
|
{
|
|
const int2 tile_size = int2(MOTION_BLUR_TILE_SIZE);
|
|
const int2 velocity_size = int2(velocity_buffer->get_width(), velocity_buffer->get_height());
|
|
const int2 tiles_count = math::divide_ceil(velocity_size, tile_size);
|
|
MemoryBuffer output(DataType::Color, tiles_count.x, tiles_count.y);
|
|
|
|
threading::parallel_for(IndexRange(tiles_count.y), 1, [&](const IndexRange sub_y_range) {
|
|
for (const int64_t y : sub_y_range) {
|
|
for (const int64_t x : IndexRange(tiles_count.x)) {
|
|
const int2 texel = int2(x, y);
|
|
|
|
float2 max_previous_velocity = float2(0.0f);
|
|
float2 max_next_velocity = float2(0.0f);
|
|
|
|
for (int j = 0; j < tile_size.y; j++) {
|
|
for (int i = 0; i < tile_size.x; i++) {
|
|
int2 sub_texel = texel * tile_size + int2(i, j);
|
|
const float4 velocity = velocity_buffer->get_elem_clamped(sub_texel.x, sub_texel.y);
|
|
max_previous_velocity = max_velocity(velocity.xy(), max_previous_velocity);
|
|
max_next_velocity = max_velocity(velocity.zw(), max_next_velocity);
|
|
}
|
|
}
|
|
|
|
const float4 max_velocity = float4(max_previous_velocity, max_next_velocity);
|
|
copy_v4_v4(output.get_elem(texel.x, texel.y), max_velocity);
|
|
}
|
|
}
|
|
});
|
|
|
|
return output;
|
|
}
|
|
|
|
struct MotionRect {
|
|
int2 bottom_left;
|
|
int2 extent;
|
|
};
|
|
|
|
static MotionRect compute_motion_rect(int2 tile, float2 motion, int2 size)
|
|
{
|
|
/* `ceil()` to number of tile touched. */
|
|
int2 point1 = tile + int2(math::sign(motion) *
|
|
math::ceil(math::abs(motion) / float(MOTION_BLUR_TILE_SIZE)));
|
|
int2 point2 = tile;
|
|
|
|
int2 max_point = math::max(point1, point2);
|
|
int2 min_point = math::min(point1, point2);
|
|
/* Clamp to bounds. */
|
|
max_point = math::min(max_point, size - 1);
|
|
min_point = math::max(min_point, int2(0));
|
|
|
|
MotionRect rect;
|
|
rect.bottom_left = min_point;
|
|
rect.extent = 1 + max_point - min_point;
|
|
return rect;
|
|
}
|
|
|
|
struct MotionLine {
|
|
/** Origin of the line. */
|
|
float2 origin;
|
|
/** Normal to the line direction. */
|
|
float2 normal;
|
|
};
|
|
|
|
static MotionLine compute_motion_line(int2 tile, float2 motion)
|
|
{
|
|
float magnitude = math::length(motion);
|
|
float2 dir = magnitude != 0.0f ? motion / magnitude : motion;
|
|
|
|
MotionLine line;
|
|
line.origin = float2(tile);
|
|
/* Rotate 90 degrees counter-clockwise. */
|
|
line.normal = float2(-dir.y, dir.x);
|
|
return line;
|
|
}
|
|
|
|
static bool is_inside_motion_line(int2 tile, MotionLine motion_line)
|
|
{
|
|
/* NOTE: Everything in is tile unit. */
|
|
float distance_to_line = math::dot(motion_line.normal, motion_line.origin - float2(tile));
|
|
/* In order to be conservative and for simplicity, we use the tiles bounding circles.
|
|
* Consider that both the tile and the line have bounding radius of M_SQRT1_2. */
|
|
return math::abs(distance_to_line) < math::numbers::sqrt2_v<float>;
|
|
}
|
|
|
|
/* The max tile velocity image computes the maximum within 32x32 blocks, while the velocity can
|
|
* in fact extend beyond such a small block. So we dilate the max blocks by taking the maximum
|
|
* along the path of each of the max velocity tiles. Since the shader uses custom max atomics,
|
|
* the output will be an indirection buffer that points to a particular tile in the original max
|
|
* tile velocity image. This is done as a form of performance optimization, see the shader for
|
|
* more information. */
|
|
static MemoryBuffer dilate_max_velocity(MemoryBuffer &max_tile_velocity, float shutter_speed)
|
|
{
|
|
const int2 size = int2(max_tile_velocity.get_width(), max_tile_velocity.get_height());
|
|
MemoryBuffer output(DataType::Color, size.x, size.y);
|
|
const float4 zero_value = float4(0.0f);
|
|
output.fill(output.get_rect(), zero_value);
|
|
|
|
for (const int64_t y : IndexRange(size.y)) {
|
|
for (const int64_t x : IndexRange(size.x)) {
|
|
const int2 src_tile = int2(x, y);
|
|
|
|
float4 max_motion = float4(max_tile_velocity.get_elem(x, y)) *
|
|
float4(float2(shutter_speed), float2(-shutter_speed));
|
|
|
|
{
|
|
/* Rectangular area (in tiles) where the motion vector spreads. */
|
|
MotionRect motion_rect = compute_motion_rect(src_tile, max_motion.xy(), size);
|
|
MotionLine motion_line = compute_motion_line(src_tile, max_motion.xy());
|
|
/* Do a conservative rasterization of the line of the motion vector line. */
|
|
for (int j = 0; j < motion_rect.extent.y; j++) {
|
|
for (int i = 0; i < motion_rect.extent.x; i++) {
|
|
int2 tile = motion_rect.bottom_left + int2(i, j);
|
|
if (is_inside_motion_line(tile, motion_line)) {
|
|
float *pixel = output.get_elem(tile.x, tile.y);
|
|
copy_v2_v2(pixel + 2,
|
|
max_velocity_approximate(pixel + 2, max_motion.zw(), tile, src_tile));
|
|
copy_v2_v2(pixel, max_velocity_approximate(pixel, max_motion.xy(), tile, src_tile));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
{
|
|
/* Rectangular area (in tiles) where the motion vector spreads. */
|
|
MotionRect motion_rect = compute_motion_rect(src_tile, max_motion.zw(), size);
|
|
MotionLine motion_line = compute_motion_line(src_tile, max_motion.zw());
|
|
/* Do a conservative rasterization of the line of the motion vector line. */
|
|
for (int j = 0; j < motion_rect.extent.y; j++) {
|
|
for (int i = 0; i < motion_rect.extent.x; i++) {
|
|
int2 tile = motion_rect.bottom_left + int2(i, j);
|
|
if (is_inside_motion_line(tile, motion_line)) {
|
|
float *pixel = output.get_elem(tile.x, tile.y);
|
|
copy_v2_v2(pixel, max_velocity_approximate(pixel, max_motion.xy(), tile, src_tile));
|
|
copy_v2_v2(pixel + 2,
|
|
max_velocity_approximate(pixel + 2, max_motion.zw(), tile, src_tile));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return output;
|
|
}
|
|
|
|
/* Interleaved gradient noise by Jorge Jimenez
|
|
* http://www.iryoku.com/next-generation-post-processing-in-call-of-duty-advanced-warfare. */
|
|
static float interleaved_gradient_noise(int2 p)
|
|
{
|
|
return math::fract(52.9829189f * math::fract(0.06711056f * p.x + 0.00583715f * p.y));
|
|
}
|
|
|
|
static float2 spread_compare(float center_motion_length,
|
|
float sample_motion_length,
|
|
float offset_length)
|
|
{
|
|
return math::clamp(
|
|
float2(center_motion_length, sample_motion_length) - offset_length + 1.0f, 0.0f, 1.0f);
|
|
}
|
|
|
|
static float2 depth_compare(float center_depth, float sample_depth)
|
|
{
|
|
float2 depth_scale = float2(DEPTH_SCALE, -DEPTH_SCALE);
|
|
return math::clamp(0.5f + depth_scale * (sample_depth - center_depth), 0.0f, 1.0f);
|
|
}
|
|
|
|
/* Kill contribution if not going the same direction. */
|
|
static float dir_compare(float2 offset, float2 sample_motion, float sample_motion_length)
|
|
{
|
|
if (sample_motion_length < 0.5f) {
|
|
return 1.0f;
|
|
}
|
|
return (math::dot(offset, sample_motion) > 0.0f) ? 1.0f : 0.0f;
|
|
}
|
|
|
|
/* Return background (x) and foreground (y) weights. */
|
|
static float2 sample_weights(float center_depth,
|
|
float sample_depth,
|
|
float center_motion_length,
|
|
float sample_motion_length,
|
|
float offset_length)
|
|
{
|
|
/* Classify foreground/background. */
|
|
float2 depth_weight = depth_compare(center_depth, sample_depth);
|
|
/* Weight if sample is overlapping or under the center pixel. */
|
|
float2 spread_weight = spread_compare(center_motion_length, sample_motion_length, offset_length);
|
|
return depth_weight * spread_weight;
|
|
}
|
|
|
|
struct Accumulator {
|
|
float4 fg;
|
|
float4 bg;
|
|
/** x: Background, y: Foreground, z: dir. */
|
|
float3 weight;
|
|
};
|
|
|
|
static void gather_sample(MemoryBuffer *image_buffer,
|
|
MemoryBuffer *depth_buffer,
|
|
MemoryBuffer *velocity_buffer,
|
|
int2 size,
|
|
float2 screen_uv,
|
|
float center_depth,
|
|
float center_motion_len,
|
|
float2 offset,
|
|
float offset_len,
|
|
const bool next,
|
|
float shutter_speed,
|
|
Accumulator &accum)
|
|
{
|
|
float2 sample_uv = screen_uv - offset / float2(size);
|
|
float4 sample_vectors = velocity_buffer->texture_bilinear_extend(sample_uv) *
|
|
float4(float2(shutter_speed), float2(-shutter_speed));
|
|
float2 sample_motion = (next) ? sample_vectors.zw() : sample_vectors.xy();
|
|
float sample_motion_len = math::length(sample_motion);
|
|
float sample_depth = depth_buffer->texture_bilinear_extend(sample_uv).x;
|
|
float4 sample_color = image_buffer->texture_bilinear_extend(sample_uv);
|
|
|
|
float2 direct_weights = sample_weights(
|
|
center_depth, sample_depth, center_motion_len, sample_motion_len, offset_len);
|
|
|
|
float3 weights;
|
|
weights.x = direct_weights.x;
|
|
weights.y = direct_weights.y;
|
|
weights.z = dir_compare(offset, sample_motion, sample_motion_len);
|
|
weights.x *= weights.z;
|
|
weights.y *= weights.z;
|
|
|
|
accum.fg += sample_color * weights.y;
|
|
accum.bg += sample_color * weights.x;
|
|
accum.weight += weights;
|
|
}
|
|
|
|
static void gather_blur(MemoryBuffer *image_buffer,
|
|
MemoryBuffer *depth_buffer,
|
|
MemoryBuffer *velocity_buffer,
|
|
int2 size,
|
|
float2 screen_uv,
|
|
float2 center_motion,
|
|
float center_depth,
|
|
float2 max_motion,
|
|
float ofs,
|
|
const bool next,
|
|
int samples_count,
|
|
float shutter_speed,
|
|
Accumulator &accum)
|
|
{
|
|
float center_motion_len = math::length(center_motion);
|
|
float max_motion_len = math::length(max_motion);
|
|
|
|
/* Tile boundaries randomization can fetch a tile where there is less motion than this pixel.
|
|
* Fix this by overriding the max_motion. */
|
|
if (max_motion_len < center_motion_len) {
|
|
max_motion_len = center_motion_len;
|
|
max_motion = center_motion;
|
|
}
|
|
|
|
if (max_motion_len < 0.5f) {
|
|
return;
|
|
}
|
|
|
|
int i;
|
|
float t, inc = 1.0f / float(samples_count);
|
|
for (i = 0, t = ofs * inc; i < samples_count; i++, t += inc) {
|
|
gather_sample(image_buffer,
|
|
depth_buffer,
|
|
velocity_buffer,
|
|
size,
|
|
screen_uv,
|
|
center_depth,
|
|
center_motion_len,
|
|
max_motion * t,
|
|
max_motion_len * t,
|
|
next,
|
|
shutter_speed,
|
|
accum);
|
|
}
|
|
|
|
if (center_motion_len < 0.5f) {
|
|
return;
|
|
}
|
|
|
|
for (i = 0, t = ofs * inc; i < samples_count; i++, t += inc) {
|
|
/* Also sample in center motion direction.
|
|
* Allow recovering motion where there is conflicting
|
|
* motion between foreground and background. */
|
|
gather_sample(image_buffer,
|
|
depth_buffer,
|
|
velocity_buffer,
|
|
size,
|
|
screen_uv,
|
|
center_depth,
|
|
center_motion_len,
|
|
center_motion * t,
|
|
center_motion_len * t,
|
|
next,
|
|
shutter_speed,
|
|
accum);
|
|
}
|
|
}
|
|
|
|
static void motion_blur(MemoryBuffer *image_buffer,
|
|
MemoryBuffer *depth_buffer,
|
|
MemoryBuffer *velocity_buffer,
|
|
MemoryBuffer *max_velocity_buffer,
|
|
MemoryBuffer *output,
|
|
int samples_count,
|
|
float shutter_speed)
|
|
{
|
|
const int2 size = int2(image_buffer->get_width(), image_buffer->get_height());
|
|
threading::parallel_for(IndexRange(size.y), 1, [&](const IndexRange sub_y_range) {
|
|
for (const int64_t y : sub_y_range) {
|
|
for (const int64_t x : IndexRange(size.x)) {
|
|
const int2 texel = int2(x, y);
|
|
float2 uv = (float2(texel) + 0.5f) / float2(size);
|
|
|
|
/* Data of the center pixel of the gather (target). */
|
|
float center_depth = *depth_buffer->get_elem(x, y);
|
|
float4 center_motion = float4(velocity_buffer->get_elem(x, y)) *
|
|
float4(float2(shutter_speed), float2(-shutter_speed));
|
|
float4 center_color = image_buffer->get_elem(x, y);
|
|
|
|
/* Randomize tile boundary to avoid ugly discontinuities. Randomize 1/4th of the tile.
|
|
* Note this randomize only in one direction but in practice it's enough. */
|
|
float rand = interleaved_gradient_noise(texel);
|
|
int2 tile = (texel + int2(rand * 2.0f - 1.0f * float(MOTION_BLUR_TILE_SIZE) * 0.25f)) /
|
|
MOTION_BLUR_TILE_SIZE;
|
|
|
|
/* No need to multiply by the shutter speed and invert the next velocities since this was
|
|
* already done in dilate_max_velocity. */
|
|
float4 max_motion = max_velocity_buffer->get_elem(tile.x, tile.y);
|
|
|
|
Accumulator accum;
|
|
accum.weight = float3(0.0f, 0.0f, 1.0f);
|
|
accum.bg = float4(0.0f);
|
|
accum.fg = float4(0.0f);
|
|
/* First linear gather. time = [T - delta, T] */
|
|
gather_blur(image_buffer,
|
|
depth_buffer,
|
|
velocity_buffer,
|
|
size,
|
|
uv,
|
|
center_motion.xy(),
|
|
center_depth,
|
|
max_motion.xy(),
|
|
rand,
|
|
false,
|
|
samples_count,
|
|
shutter_speed,
|
|
accum);
|
|
/* Second linear gather. time = [T, T + delta] */
|
|
gather_blur(image_buffer,
|
|
depth_buffer,
|
|
velocity_buffer,
|
|
size,
|
|
uv,
|
|
center_motion.zw(),
|
|
center_depth,
|
|
max_motion.zw(),
|
|
rand,
|
|
true,
|
|
samples_count,
|
|
shutter_speed,
|
|
accum);
|
|
|
|
#if 1 /* Own addition. Not present in reference implementation. */
|
|
/* Avoid division by 0.0. */
|
|
float w = 1.0f / (50.0f * float(samples_count) * 4.0f);
|
|
accum.bg += center_color * w;
|
|
accum.weight.x += w;
|
|
/* NOTE: In Jimenez's presentation, they used center sample.
|
|
* We use background color as it contains more information for foreground
|
|
* elements that have not enough weights.
|
|
* Yield better blur in complex motion. */
|
|
center_color = accum.bg / accum.weight.x;
|
|
#endif
|
|
/* Merge background. */
|
|
accum.fg += accum.bg;
|
|
accum.weight.y += accum.weight.x;
|
|
/* Balance accumulation for failed samples.
|
|
* We replace the missing foreground by the background. */
|
|
float blend_fac = math::clamp(1.0f - accum.weight.y / accum.weight.z, 0.0f, 1.0f);
|
|
float4 out_color = (accum.fg / accum.weight.z) + center_color * blend_fac;
|
|
|
|
copy_v4_v4(output->get_elem(x, y), out_color);
|
|
}
|
|
}
|
|
});
|
|
}
|
|
|
|
void VectorBlurOperation::update_memory_buffer(MemoryBuffer *output,
|
|
const rcti & /*area*/,
|
|
Span<MemoryBuffer *> inputs)
|
|
{
|
|
MemoryBuffer *image = inputs[IMAGE_INPUT_INDEX];
|
|
MemoryBuffer *depth = inputs[DEPTH_INPUT_INDEX];
|
|
MemoryBuffer *velocity = inputs[VELOCITY_INPUT_INDEX];
|
|
|
|
const bool image_needs_inflation = image->is_a_single_elem();
|
|
const bool depth_needs_inflation = depth->is_a_single_elem();
|
|
const bool velocity_needs_inflation = velocity->is_a_single_elem();
|
|
|
|
MemoryBuffer *image_buffer = image_needs_inflation ? image->inflate() : image;
|
|
MemoryBuffer *depth_buffer = depth_needs_inflation ? depth->inflate() : depth;
|
|
MemoryBuffer *velocity_buffer = velocity_needs_inflation ? velocity->inflate() : velocity;
|
|
|
|
MemoryBuffer max_tile_velocity = compute_max_tile_velocity(velocity_buffer);
|
|
MemoryBuffer max_velocity = dilate_max_velocity(max_tile_velocity, settings_->fac);
|
|
motion_blur(image_buffer,
|
|
depth_buffer,
|
|
velocity_buffer,
|
|
&max_velocity,
|
|
output,
|
|
settings_->samples,
|
|
settings_->fac);
|
|
|
|
if (image_needs_inflation) {
|
|
delete image_buffer;
|
|
}
|
|
|
|
if (depth_needs_inflation) {
|
|
delete depth_buffer;
|
|
}
|
|
|
|
if (velocity_needs_inflation) {
|
|
delete velocity_buffer;
|
|
}
|
|
}
|
|
|
|
void VectorBlurOperation::get_area_of_interest(const int /*input_idx*/,
|
|
const rcti & /*output_area*/,
|
|
rcti &r_input_area)
|
|
{
|
|
r_input_area = this->get_canvas();
|
|
}
|
|
|
|
} // namespace blender::compositor
|