2023-08-16 00:20:26 +10:00
|
|
|
/* SPDX-FileCopyrightText: 2023 Blender Authors
|
2023-05-31 16:19:06 +02:00
|
|
|
*
|
|
|
|
|
* SPDX-License-Identifier: GPL-2.0-or-later */
|
2022-10-11 13:22:52 +02:00
|
|
|
|
2024-11-07 13:05:43 +02:00
|
|
|
#include <cmath>
|
|
|
|
|
#include <limits>
|
|
|
|
|
|
|
|
|
|
#include "BLI_index_range.hh"
|
|
|
|
|
#include "BLI_math_base.hh"
|
2022-10-11 13:22:52 +02:00
|
|
|
#include "BLI_math_vector.hh"
|
2023-01-04 00:14:55 +01:00
|
|
|
#include "BLI_math_vector_types.hh"
|
2024-11-07 13:05:43 +02:00
|
|
|
#include "BLI_task.hh"
|
2022-10-11 13:22:52 +02:00
|
|
|
|
|
|
|
|
#include "MEM_guardedalloc.h"
|
|
|
|
|
|
2024-03-23 01:24:18 +01:00
|
|
|
#include "GPU_compute.hh"
|
|
|
|
|
#include "GPU_shader.hh"
|
2025-01-26 20:08:03 +01:00
|
|
|
#include "GPU_state.hh"
|
2024-03-23 01:24:18 +01:00
|
|
|
#include "GPU_texture.hh"
|
2025-02-12 15:59:45 +01:00
|
|
|
#include "GPU_texture_pool.hh"
|
2022-10-11 13:22:52 +02:00
|
|
|
|
|
|
|
|
#include "COM_context.hh"
|
2024-11-07 13:05:43 +02:00
|
|
|
#include "COM_result.hh"
|
2022-10-11 13:22:52 +02:00
|
|
|
|
2022-10-11 16:22:14 +02:00
|
|
|
#include "COM_algorithm_parallel_reduction.hh"
|
|
|
|
|
|
2024-12-17 11:39:04 +01:00
|
|
|
namespace blender::compositor {
|
2022-10-11 13:22:52 +02:00
|
|
|
|
|
|
|
|
/* Reduces the given texture into a single value and returns it. The return value should be freed
|
|
|
|
|
* by a call to MEM_freeN. The return value is either a pointer to a float, or a pointer to an
|
|
|
|
|
* array of floats that represents a vector. This depends on the given format, which should be
|
|
|
|
|
* compatible with the reduction shader.
|
|
|
|
|
*
|
|
|
|
|
* The given reduction shader should be bound when calling the function and the shader is expected
|
|
|
|
|
* to be derived from the compositor_parallel_reduction.glsl shader, see that file for more
|
|
|
|
|
* information. Also see the compositor_parallel_reduction_info.hh file for example shader
|
|
|
|
|
* definitions. */
|
2025-07-22 09:48:10 +02:00
|
|
|
static float *parallel_reduction_dispatch(blender::gpu::Texture *texture,
|
2025-08-11 09:34:28 +02:00
|
|
|
gpu::Shader *shader,
|
2025-07-22 14:58:54 +02:00
|
|
|
blender::gpu::TextureFormat format)
|
2022-10-11 13:22:52 +02:00
|
|
|
{
|
|
|
|
|
GPU_shader_uniform_1b(shader, "is_initial_reduction", true);
|
|
|
|
|
|
2025-07-22 09:48:10 +02:00
|
|
|
blender::gpu::Texture *texture_to_reduce = texture;
|
2022-10-11 13:22:52 +02:00
|
|
|
int2 size_to_reduce = int2(GPU_texture_width(texture), GPU_texture_height(texture));
|
|
|
|
|
|
|
|
|
|
/* Dispatch the reduction shader until the texture reduces to a single pixel. */
|
|
|
|
|
while (size_to_reduce != int2(1)) {
|
|
|
|
|
const int2 reduced_size = math::divide_ceil(size_to_reduce, int2(16));
|
2025-07-22 09:48:10 +02:00
|
|
|
blender::gpu::Texture *reduced_texture = gpu::TexturePool::get().acquire_texture(
|
2025-02-12 15:59:45 +01:00
|
|
|
reduced_size.x, reduced_size.y, format, GPU_TEXTURE_USAGE_GENERAL);
|
2022-10-11 13:22:52 +02:00
|
|
|
|
|
|
|
|
GPU_memory_barrier(GPU_BARRIER_TEXTURE_FETCH);
|
2023-02-12 22:52:27 +01:00
|
|
|
const int texture_image_unit = GPU_shader_get_sampler_binding(shader, "input_tx");
|
2022-10-11 13:22:52 +02:00
|
|
|
GPU_texture_bind(texture_to_reduce, texture_image_unit);
|
|
|
|
|
|
2023-02-12 22:52:27 +01:00
|
|
|
const int image_unit = GPU_shader_get_sampler_binding(shader, "output_img");
|
2022-10-11 13:22:52 +02:00
|
|
|
GPU_texture_image_bind(reduced_texture, image_unit);
|
|
|
|
|
|
|
|
|
|
GPU_compute_dispatch(shader, reduced_size.x, reduced_size.y, 1);
|
|
|
|
|
|
|
|
|
|
GPU_texture_image_unbind(reduced_texture);
|
|
|
|
|
GPU_texture_unbind(texture_to_reduce);
|
|
|
|
|
|
|
|
|
|
/* Release the input texture only if it is not the source texture, since the source texture is
|
|
|
|
|
* not acquired or owned by the function. */
|
|
|
|
|
if (texture_to_reduce != texture) {
|
2025-02-12 15:59:45 +01:00
|
|
|
gpu::TexturePool::get().release_texture(texture_to_reduce);
|
2022-10-11 13:22:52 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
texture_to_reduce = reduced_texture;
|
|
|
|
|
size_to_reduce = reduced_size;
|
|
|
|
|
|
|
|
|
|
GPU_shader_uniform_1b(shader, "is_initial_reduction", false);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
GPU_memory_barrier(GPU_BARRIER_TEXTURE_UPDATE);
|
|
|
|
|
float *pixel = static_cast<float *>(GPU_texture_read(texture_to_reduce, GPU_DATA_FLOAT, 0));
|
|
|
|
|
|
|
|
|
|
/* Release the final texture only if it is not the source texture, since the source texture is
|
|
|
|
|
* not acquired or owned by the function. */
|
|
|
|
|
if (texture_to_reduce != texture) {
|
2025-02-12 15:59:45 +01:00
|
|
|
gpu::TexturePool::get().release_texture(texture_to_reduce);
|
2022-10-11 13:22:52 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return pixel;
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-07 13:05:43 +02:00
|
|
|
/* Reduces the given function in parallel over the given 2D range, the reduction function should
|
|
|
|
|
* have the given identity value. The given function gets as arguments the texel coordinates of the
|
|
|
|
|
* element of the range as well as a reference to the value where the result should be accumulated,
|
|
|
|
|
* while the reduction function gets a reference to two values and returns their reduction. */
|
|
|
|
|
template<typename Value, typename Function, typename Reduction>
|
|
|
|
|
static Value parallel_reduce(const int2 range,
|
|
|
|
|
const Value &identity,
|
|
|
|
|
const Function &function,
|
|
|
|
|
const Reduction &reduction)
|
|
|
|
|
{
|
|
|
|
|
return threading::parallel_reduce(
|
|
|
|
|
IndexRange(range.y),
|
|
|
|
|
64,
|
|
|
|
|
identity,
|
|
|
|
|
[&](const IndexRange sub_y_range, const Value &initial_value) {
|
|
|
|
|
Value result = initial_value;
|
|
|
|
|
for (const int64_t y : sub_y_range) {
|
|
|
|
|
for (const int64_t x : IndexRange(range.x)) {
|
|
|
|
|
function(int2(x, y), result);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return result;
|
|
|
|
|
},
|
|
|
|
|
reduction);
|
|
|
|
|
}
|
|
|
|
|
|
2022-10-11 13:22:52 +02:00
|
|
|
/* --------------------------------------------------------------------
|
|
|
|
|
* Sum Reductions.
|
|
|
|
|
*/
|
|
|
|
|
|
2024-11-07 13:05:43 +02:00
|
|
|
static float sum_red_gpu(Context &context, const Result &result)
|
2022-10-11 13:22:52 +02:00
|
|
|
{
|
2025-08-11 09:34:28 +02:00
|
|
|
gpu::Shader *shader = context.get_shader("compositor_sum_red", ResultPrecision::Full);
|
2022-10-11 13:22:52 +02:00
|
|
|
GPU_shader_bind(shader);
|
|
|
|
|
|
Realtime Compositor: Support full precision compositing
This patch adds support for full precision compositing for the Realtime
Compositor. A new precision option was added to the compositor to change
between half and full precision compositing, where the Auto option uses
half for the viewport compositor and the interactive render compositor,
while full is used for final renders.
The compositor context now need to implement the get_precision() method
to indicate its preferred precision. Intermediate results will be stored
using the context's precision, with a number of exceptions that can use
a different precision regardless of the context's precision. For
instance, summed area tables are always stored in full float results
even if the context specified half float. Conversely, jump flooding
tables are always stored in half integer results even if the context
specified full. The former requires full float while the latter has no
use for it.
Since shaders are created for a specific precision, we need two variants
of each compositor shader to account for the context's possible
precision. However, to avoid doubling the shader info count and reduce
boilerplate code and development time, an automated mechanism was
employed. A single shader info of whatever precision needs to be added,
then, at runtime, the shader info can be adjusted to change the
precision of the outputs. That shader variant is then cached in the
static cache manager for future processing-free shader retrieval.
Therefore, the shader manager was removed in favor of a cached shader
container in the static cache manager.
A number of utilities were added to make the creation of results as well as
the retrieval of shader with the target precision easier. Further, a
number of precision-specific shaders were removed in favor of more
generic ones that utilizes the aforementioned shader retrieval
mechanism.
Pull Request: https://projects.blender.org/blender/blender/pulls/113476
2023-11-08 08:32:00 +01:00
|
|
|
float *reduced_value = parallel_reduction_dispatch(
|
2025-02-12 15:59:45 +01:00
|
|
|
result, shader, Result::gpu_texture_format(ResultType::Float, ResultPrecision::Full));
|
2022-10-11 13:22:52 +02:00
|
|
|
const float sum = *reduced_value;
|
|
|
|
|
MEM_freeN(reduced_value);
|
|
|
|
|
GPU_shader_unbind();
|
|
|
|
|
|
|
|
|
|
return sum;
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-07 13:05:43 +02:00
|
|
|
static float sum_red_cpu(const Result &result)
|
|
|
|
|
{
|
|
|
|
|
return float(parallel_reduce(
|
|
|
|
|
result.domain().size,
|
|
|
|
|
0.0,
|
|
|
|
|
[&](const int2 texel, double &accumulated_value) {
|
2024-12-05 16:55:06 +01:00
|
|
|
accumulated_value += result.load_pixel<float4>(texel).x;
|
2024-11-07 13:05:43 +02:00
|
|
|
},
|
|
|
|
|
[&](const double &a, const double &b) { return a + b; }));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
float sum_red(Context &context, const Result &result)
|
|
|
|
|
{
|
|
|
|
|
if (context.use_gpu()) {
|
|
|
|
|
return sum_red_gpu(context, result);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return sum_red_cpu(result);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static float sum_green_gpu(Context &context, const Result &result)
|
2022-10-11 13:22:52 +02:00
|
|
|
{
|
2025-08-11 09:34:28 +02:00
|
|
|
gpu::Shader *shader = context.get_shader("compositor_sum_green", ResultPrecision::Full);
|
2022-10-11 13:22:52 +02:00
|
|
|
GPU_shader_bind(shader);
|
|
|
|
|
|
Realtime Compositor: Support full precision compositing
This patch adds support for full precision compositing for the Realtime
Compositor. A new precision option was added to the compositor to change
between half and full precision compositing, where the Auto option uses
half for the viewport compositor and the interactive render compositor,
while full is used for final renders.
The compositor context now need to implement the get_precision() method
to indicate its preferred precision. Intermediate results will be stored
using the context's precision, with a number of exceptions that can use
a different precision regardless of the context's precision. For
instance, summed area tables are always stored in full float results
even if the context specified half float. Conversely, jump flooding
tables are always stored in half integer results even if the context
specified full. The former requires full float while the latter has no
use for it.
Since shaders are created for a specific precision, we need two variants
of each compositor shader to account for the context's possible
precision. However, to avoid doubling the shader info count and reduce
boilerplate code and development time, an automated mechanism was
employed. A single shader info of whatever precision needs to be added,
then, at runtime, the shader info can be adjusted to change the
precision of the outputs. That shader variant is then cached in the
static cache manager for future processing-free shader retrieval.
Therefore, the shader manager was removed in favor of a cached shader
container in the static cache manager.
A number of utilities were added to make the creation of results as well as
the retrieval of shader with the target precision easier. Further, a
number of precision-specific shaders were removed in favor of more
generic ones that utilizes the aforementioned shader retrieval
mechanism.
Pull Request: https://projects.blender.org/blender/blender/pulls/113476
2023-11-08 08:32:00 +01:00
|
|
|
float *reduced_value = parallel_reduction_dispatch(
|
2025-02-12 15:59:45 +01:00
|
|
|
result, shader, Result::gpu_texture_format(ResultType::Float, ResultPrecision::Full));
|
2022-10-11 13:22:52 +02:00
|
|
|
const float sum = *reduced_value;
|
|
|
|
|
MEM_freeN(reduced_value);
|
|
|
|
|
GPU_shader_unbind();
|
|
|
|
|
|
|
|
|
|
return sum;
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-07 13:05:43 +02:00
|
|
|
static float sum_green_cpu(const Result &result)
|
|
|
|
|
{
|
|
|
|
|
return float(parallel_reduce(
|
|
|
|
|
result.domain().size,
|
|
|
|
|
0.0,
|
|
|
|
|
[&](const int2 texel, double &accumulated_value) {
|
2024-12-05 16:55:06 +01:00
|
|
|
accumulated_value += result.load_pixel<float4>(texel).y;
|
2024-11-07 13:05:43 +02:00
|
|
|
},
|
|
|
|
|
[&](const double &a, const double &b) { return a + b; }));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
float sum_green(Context &context, const Result &result)
|
|
|
|
|
{
|
|
|
|
|
if (context.use_gpu()) {
|
|
|
|
|
return sum_green_gpu(context, result);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return sum_green_cpu(result);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static float sum_blue_gpu(Context &context, const Result &result)
|
2022-10-11 13:22:52 +02:00
|
|
|
{
|
2025-08-11 09:34:28 +02:00
|
|
|
gpu::Shader *shader = context.get_shader("compositor_sum_blue", ResultPrecision::Full);
|
2022-10-11 13:22:52 +02:00
|
|
|
GPU_shader_bind(shader);
|
|
|
|
|
|
Realtime Compositor: Support full precision compositing
This patch adds support for full precision compositing for the Realtime
Compositor. A new precision option was added to the compositor to change
between half and full precision compositing, where the Auto option uses
half for the viewport compositor and the interactive render compositor,
while full is used for final renders.
The compositor context now need to implement the get_precision() method
to indicate its preferred precision. Intermediate results will be stored
using the context's precision, with a number of exceptions that can use
a different precision regardless of the context's precision. For
instance, summed area tables are always stored in full float results
even if the context specified half float. Conversely, jump flooding
tables are always stored in half integer results even if the context
specified full. The former requires full float while the latter has no
use for it.
Since shaders are created for a specific precision, we need two variants
of each compositor shader to account for the context's possible
precision. However, to avoid doubling the shader info count and reduce
boilerplate code and development time, an automated mechanism was
employed. A single shader info of whatever precision needs to be added,
then, at runtime, the shader info can be adjusted to change the
precision of the outputs. That shader variant is then cached in the
static cache manager for future processing-free shader retrieval.
Therefore, the shader manager was removed in favor of a cached shader
container in the static cache manager.
A number of utilities were added to make the creation of results as well as
the retrieval of shader with the target precision easier. Further, a
number of precision-specific shaders were removed in favor of more
generic ones that utilizes the aforementioned shader retrieval
mechanism.
Pull Request: https://projects.blender.org/blender/blender/pulls/113476
2023-11-08 08:32:00 +01:00
|
|
|
float *reduced_value = parallel_reduction_dispatch(
|
2025-02-12 15:59:45 +01:00
|
|
|
result, shader, Result::gpu_texture_format(ResultType::Float, ResultPrecision::Full));
|
2022-10-11 13:22:52 +02:00
|
|
|
const float sum = *reduced_value;
|
|
|
|
|
MEM_freeN(reduced_value);
|
|
|
|
|
GPU_shader_unbind();
|
|
|
|
|
|
|
|
|
|
return sum;
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-07 13:05:43 +02:00
|
|
|
static float sum_blue_cpu(const Result &result)
|
|
|
|
|
{
|
|
|
|
|
return float(parallel_reduce(
|
|
|
|
|
result.domain().size,
|
|
|
|
|
0.0,
|
|
|
|
|
[&](const int2 texel, double &accumulated_value) {
|
2024-12-05 16:55:06 +01:00
|
|
|
accumulated_value += result.load_pixel<float4>(texel).z;
|
2024-11-07 13:05:43 +02:00
|
|
|
},
|
|
|
|
|
[&](const double &a, const double &b) { return a + b; }));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
float sum_blue(Context &context, const Result &result)
|
|
|
|
|
{
|
|
|
|
|
if (context.use_gpu()) {
|
|
|
|
|
return sum_blue_gpu(context, result);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return sum_blue_cpu(result);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static float sum_luminance_gpu(Context &context,
|
|
|
|
|
const Result &result,
|
|
|
|
|
const float3 &luminance_coefficients)
|
2022-10-11 13:22:52 +02:00
|
|
|
{
|
2025-08-11 09:34:28 +02:00
|
|
|
gpu::Shader *shader = context.get_shader("compositor_sum_luminance", ResultPrecision::Full);
|
2022-10-11 13:22:52 +02:00
|
|
|
GPU_shader_bind(shader);
|
|
|
|
|
|
|
|
|
|
GPU_shader_uniform_3fv(shader, "luminance_coefficients", luminance_coefficients);
|
|
|
|
|
|
Realtime Compositor: Support full precision compositing
This patch adds support for full precision compositing for the Realtime
Compositor. A new precision option was added to the compositor to change
between half and full precision compositing, where the Auto option uses
half for the viewport compositor and the interactive render compositor,
while full is used for final renders.
The compositor context now need to implement the get_precision() method
to indicate its preferred precision. Intermediate results will be stored
using the context's precision, with a number of exceptions that can use
a different precision regardless of the context's precision. For
instance, summed area tables are always stored in full float results
even if the context specified half float. Conversely, jump flooding
tables are always stored in half integer results even if the context
specified full. The former requires full float while the latter has no
use for it.
Since shaders are created for a specific precision, we need two variants
of each compositor shader to account for the context's possible
precision. However, to avoid doubling the shader info count and reduce
boilerplate code and development time, an automated mechanism was
employed. A single shader info of whatever precision needs to be added,
then, at runtime, the shader info can be adjusted to change the
precision of the outputs. That shader variant is then cached in the
static cache manager for future processing-free shader retrieval.
Therefore, the shader manager was removed in favor of a cached shader
container in the static cache manager.
A number of utilities were added to make the creation of results as well as
the retrieval of shader with the target precision easier. Further, a
number of precision-specific shaders were removed in favor of more
generic ones that utilizes the aforementioned shader retrieval
mechanism.
Pull Request: https://projects.blender.org/blender/blender/pulls/113476
2023-11-08 08:32:00 +01:00
|
|
|
float *reduced_value = parallel_reduction_dispatch(
|
2025-02-12 15:59:45 +01:00
|
|
|
result, shader, Result::gpu_texture_format(ResultType::Float, ResultPrecision::Full));
|
2022-10-11 13:22:52 +02:00
|
|
|
const float sum = *reduced_value;
|
|
|
|
|
MEM_freeN(reduced_value);
|
|
|
|
|
GPU_shader_unbind();
|
|
|
|
|
|
|
|
|
|
return sum;
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-07 13:05:43 +02:00
|
|
|
static float sum_luminance_cpu(const Result &result, const float3 &luminance_coefficients)
|
|
|
|
|
{
|
|
|
|
|
return float(parallel_reduce(
|
|
|
|
|
result.domain().size,
|
|
|
|
|
0.0,
|
|
|
|
|
[&](const int2 texel, double &accumulated_value) {
|
2024-12-05 16:55:06 +01:00
|
|
|
accumulated_value += math::dot(result.load_pixel<float4>(texel).xyz(),
|
|
|
|
|
luminance_coefficients);
|
2024-11-07 13:05:43 +02:00
|
|
|
},
|
|
|
|
|
[&](const double &a, const double &b) { return a + b; }));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
float sum_luminance(Context &context, const Result &result, const float3 &luminance_coefficients)
|
|
|
|
|
{
|
|
|
|
|
if (context.use_gpu()) {
|
|
|
|
|
return sum_luminance_gpu(context, result, luminance_coefficients);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return sum_luminance_cpu(result, luminance_coefficients);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static float sum_log_luminance_gpu(Context &context,
|
|
|
|
|
const Result &result,
|
|
|
|
|
const float3 &luminance_coefficients)
|
2022-10-20 15:02:41 +02:00
|
|
|
{
|
2025-08-11 09:34:28 +02:00
|
|
|
gpu::Shader *shader = context.get_shader("compositor_sum_log_luminance", ResultPrecision::Full);
|
2022-10-20 15:02:41 +02:00
|
|
|
GPU_shader_bind(shader);
|
|
|
|
|
|
|
|
|
|
GPU_shader_uniform_3fv(shader, "luminance_coefficients", luminance_coefficients);
|
|
|
|
|
|
Realtime Compositor: Support full precision compositing
This patch adds support for full precision compositing for the Realtime
Compositor. A new precision option was added to the compositor to change
between half and full precision compositing, where the Auto option uses
half for the viewport compositor and the interactive render compositor,
while full is used for final renders.
The compositor context now need to implement the get_precision() method
to indicate its preferred precision. Intermediate results will be stored
using the context's precision, with a number of exceptions that can use
a different precision regardless of the context's precision. For
instance, summed area tables are always stored in full float results
even if the context specified half float. Conversely, jump flooding
tables are always stored in half integer results even if the context
specified full. The former requires full float while the latter has no
use for it.
Since shaders are created for a specific precision, we need two variants
of each compositor shader to account for the context's possible
precision. However, to avoid doubling the shader info count and reduce
boilerplate code and development time, an automated mechanism was
employed. A single shader info of whatever precision needs to be added,
then, at runtime, the shader info can be adjusted to change the
precision of the outputs. That shader variant is then cached in the
static cache manager for future processing-free shader retrieval.
Therefore, the shader manager was removed in favor of a cached shader
container in the static cache manager.
A number of utilities were added to make the creation of results as well as
the retrieval of shader with the target precision easier. Further, a
number of precision-specific shaders were removed in favor of more
generic ones that utilizes the aforementioned shader retrieval
mechanism.
Pull Request: https://projects.blender.org/blender/blender/pulls/113476
2023-11-08 08:32:00 +01:00
|
|
|
float *reduced_value = parallel_reduction_dispatch(
|
2025-02-12 15:59:45 +01:00
|
|
|
result, shader, Result::gpu_texture_format(ResultType::Float, ResultPrecision::Full));
|
2022-10-20 15:02:41 +02:00
|
|
|
const float sum = *reduced_value;
|
|
|
|
|
MEM_freeN(reduced_value);
|
|
|
|
|
GPU_shader_unbind();
|
|
|
|
|
|
|
|
|
|
return sum;
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-07 13:05:43 +02:00
|
|
|
static float sum_log_luminance_cpu(const Result &result, const float3 &luminance_coefficients)
|
|
|
|
|
{
|
|
|
|
|
return float(parallel_reduce(
|
|
|
|
|
result.domain().size,
|
|
|
|
|
0.0,
|
|
|
|
|
[&](const int2 texel, double &accumulated_value) {
|
2024-12-05 16:55:06 +01:00
|
|
|
const float luminance = math::dot(result.load_pixel<float4>(texel).xyz(),
|
|
|
|
|
luminance_coefficients);
|
2024-11-07 13:05:43 +02:00
|
|
|
accumulated_value += std::log(math::max(luminance, 1e-5f));
|
|
|
|
|
},
|
|
|
|
|
[&](const double &a, const double &b) { return a + b; }));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
float sum_log_luminance(Context &context,
|
|
|
|
|
const Result &result,
|
|
|
|
|
const float3 &luminance_coefficients)
|
|
|
|
|
{
|
|
|
|
|
if (context.use_gpu()) {
|
|
|
|
|
return sum_log_luminance_gpu(context, result, luminance_coefficients);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return sum_log_luminance_cpu(result, luminance_coefficients);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static float4 sum_color_gpu(Context &context, const Result &result)
|
2022-10-20 15:02:41 +02:00
|
|
|
{
|
2025-08-11 09:34:28 +02:00
|
|
|
gpu::Shader *shader = context.get_shader("compositor_sum_color", ResultPrecision::Full);
|
2022-10-20 15:02:41 +02:00
|
|
|
GPU_shader_bind(shader);
|
|
|
|
|
|
Realtime Compositor: Support full precision compositing
This patch adds support for full precision compositing for the Realtime
Compositor. A new precision option was added to the compositor to change
between half and full precision compositing, where the Auto option uses
half for the viewport compositor and the interactive render compositor,
while full is used for final renders.
The compositor context now need to implement the get_precision() method
to indicate its preferred precision. Intermediate results will be stored
using the context's precision, with a number of exceptions that can use
a different precision regardless of the context's precision. For
instance, summed area tables are always stored in full float results
even if the context specified half float. Conversely, jump flooding
tables are always stored in half integer results even if the context
specified full. The former requires full float while the latter has no
use for it.
Since shaders are created for a specific precision, we need two variants
of each compositor shader to account for the context's possible
precision. However, to avoid doubling the shader info count and reduce
boilerplate code and development time, an automated mechanism was
employed. A single shader info of whatever precision needs to be added,
then, at runtime, the shader info can be adjusted to change the
precision of the outputs. That shader variant is then cached in the
static cache manager for future processing-free shader retrieval.
Therefore, the shader manager was removed in favor of a cached shader
container in the static cache manager.
A number of utilities were added to make the creation of results as well as
the retrieval of shader with the target precision easier. Further, a
number of precision-specific shaders were removed in favor of more
generic ones that utilizes the aforementioned shader retrieval
mechanism.
Pull Request: https://projects.blender.org/blender/blender/pulls/113476
2023-11-08 08:32:00 +01:00
|
|
|
float *reduced_value = parallel_reduction_dispatch(
|
2025-02-12 15:59:45 +01:00
|
|
|
result, shader, Result::gpu_texture_format(ResultType::Color, ResultPrecision::Full));
|
2022-10-20 15:02:41 +02:00
|
|
|
const float4 sum = float4(reduced_value);
|
|
|
|
|
MEM_freeN(reduced_value);
|
|
|
|
|
GPU_shader_unbind();
|
|
|
|
|
|
|
|
|
|
return sum;
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-07 13:05:43 +02:00
|
|
|
static float4 sum_color_cpu(const Result &result)
|
|
|
|
|
{
|
|
|
|
|
return float4(parallel_reduce(
|
|
|
|
|
result.domain().size,
|
|
|
|
|
double4(0.0),
|
|
|
|
|
[&](const int2 texel, double4 &accumulated_value) {
|
2024-12-05 16:55:06 +01:00
|
|
|
accumulated_value += double4(result.load_pixel<float4>(texel));
|
2024-11-07 13:05:43 +02:00
|
|
|
},
|
|
|
|
|
[&](const double4 &a, const double4 &b) { return a + b; }));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
float4 sum_color(Context &context, const Result &result)
|
|
|
|
|
{
|
|
|
|
|
if (context.use_gpu()) {
|
|
|
|
|
return sum_color_gpu(context, result);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return sum_color_cpu(result);
|
|
|
|
|
}
|
|
|
|
|
|
2022-10-11 13:22:52 +02:00
|
|
|
/* --------------------------------------------------------------------
|
|
|
|
|
* Sum Of Squared Difference Reductions.
|
|
|
|
|
*/
|
|
|
|
|
|
2024-11-07 13:05:43 +02:00
|
|
|
static float sum_red_squared_difference_gpu(Context &context,
|
|
|
|
|
const Result &result,
|
|
|
|
|
const float subtrahend)
|
2022-10-11 13:22:52 +02:00
|
|
|
{
|
2025-08-11 09:34:28 +02:00
|
|
|
gpu::Shader *shader = context.get_shader("compositor_sum_red_squared_difference",
|
|
|
|
|
ResultPrecision::Full);
|
2022-10-11 13:22:52 +02:00
|
|
|
GPU_shader_bind(shader);
|
|
|
|
|
|
|
|
|
|
GPU_shader_uniform_1f(shader, "subtrahend", subtrahend);
|
|
|
|
|
|
Realtime Compositor: Support full precision compositing
This patch adds support for full precision compositing for the Realtime
Compositor. A new precision option was added to the compositor to change
between half and full precision compositing, where the Auto option uses
half for the viewport compositor and the interactive render compositor,
while full is used for final renders.
The compositor context now need to implement the get_precision() method
to indicate its preferred precision. Intermediate results will be stored
using the context's precision, with a number of exceptions that can use
a different precision regardless of the context's precision. For
instance, summed area tables are always stored in full float results
even if the context specified half float. Conversely, jump flooding
tables are always stored in half integer results even if the context
specified full. The former requires full float while the latter has no
use for it.
Since shaders are created for a specific precision, we need two variants
of each compositor shader to account for the context's possible
precision. However, to avoid doubling the shader info count and reduce
boilerplate code and development time, an automated mechanism was
employed. A single shader info of whatever precision needs to be added,
then, at runtime, the shader info can be adjusted to change the
precision of the outputs. That shader variant is then cached in the
static cache manager for future processing-free shader retrieval.
Therefore, the shader manager was removed in favor of a cached shader
container in the static cache manager.
A number of utilities were added to make the creation of results as well as
the retrieval of shader with the target precision easier. Further, a
number of precision-specific shaders were removed in favor of more
generic ones that utilizes the aforementioned shader retrieval
mechanism.
Pull Request: https://projects.blender.org/blender/blender/pulls/113476
2023-11-08 08:32:00 +01:00
|
|
|
float *reduced_value = parallel_reduction_dispatch(
|
2025-02-12 15:59:45 +01:00
|
|
|
result, shader, Result::gpu_texture_format(ResultType::Float, ResultPrecision::Full));
|
2022-10-11 13:22:52 +02:00
|
|
|
const float sum = *reduced_value;
|
|
|
|
|
MEM_freeN(reduced_value);
|
|
|
|
|
GPU_shader_unbind();
|
|
|
|
|
|
|
|
|
|
return sum;
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-07 13:05:43 +02:00
|
|
|
static float sum_red_squared_difference_cpu(const Result &result, const float subtrahend)
|
|
|
|
|
{
|
|
|
|
|
return float(parallel_reduce(
|
|
|
|
|
result.domain().size,
|
|
|
|
|
0.0,
|
|
|
|
|
[&](const int2 texel, double &accumulated_value) {
|
2024-12-05 16:55:06 +01:00
|
|
|
accumulated_value += math::square(result.load_pixel<float4>(texel).x - subtrahend);
|
2024-11-07 13:05:43 +02:00
|
|
|
},
|
|
|
|
|
[&](const double &a, const double &b) { return a + b; }));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
float sum_red_squared_difference(Context &context, const Result &result, const float subtrahend)
|
|
|
|
|
{
|
|
|
|
|
if (context.use_gpu()) {
|
|
|
|
|
return sum_red_squared_difference_gpu(context, result, subtrahend);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return sum_red_squared_difference_cpu(result, subtrahend);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static float sum_green_squared_difference_gpu(Context &context,
|
|
|
|
|
const Result &result,
|
|
|
|
|
const float subtrahend)
|
2022-10-11 13:22:52 +02:00
|
|
|
{
|
2025-08-11 09:34:28 +02:00
|
|
|
gpu::Shader *shader = context.get_shader("compositor_sum_green_squared_difference",
|
|
|
|
|
ResultPrecision::Full);
|
2022-10-11 13:22:52 +02:00
|
|
|
GPU_shader_bind(shader);
|
|
|
|
|
|
|
|
|
|
GPU_shader_uniform_1f(shader, "subtrahend", subtrahend);
|
|
|
|
|
|
Realtime Compositor: Support full precision compositing
This patch adds support for full precision compositing for the Realtime
Compositor. A new precision option was added to the compositor to change
between half and full precision compositing, where the Auto option uses
half for the viewport compositor and the interactive render compositor,
while full is used for final renders.
The compositor context now need to implement the get_precision() method
to indicate its preferred precision. Intermediate results will be stored
using the context's precision, with a number of exceptions that can use
a different precision regardless of the context's precision. For
instance, summed area tables are always stored in full float results
even if the context specified half float. Conversely, jump flooding
tables are always stored in half integer results even if the context
specified full. The former requires full float while the latter has no
use for it.
Since shaders are created for a specific precision, we need two variants
of each compositor shader to account for the context's possible
precision. However, to avoid doubling the shader info count and reduce
boilerplate code and development time, an automated mechanism was
employed. A single shader info of whatever precision needs to be added,
then, at runtime, the shader info can be adjusted to change the
precision of the outputs. That shader variant is then cached in the
static cache manager for future processing-free shader retrieval.
Therefore, the shader manager was removed in favor of a cached shader
container in the static cache manager.
A number of utilities were added to make the creation of results as well as
the retrieval of shader with the target precision easier. Further, a
number of precision-specific shaders were removed in favor of more
generic ones that utilizes the aforementioned shader retrieval
mechanism.
Pull Request: https://projects.blender.org/blender/blender/pulls/113476
2023-11-08 08:32:00 +01:00
|
|
|
float *reduced_value = parallel_reduction_dispatch(
|
2025-02-12 15:59:45 +01:00
|
|
|
result, shader, Result::gpu_texture_format(ResultType::Float, ResultPrecision::Full));
|
2022-10-11 13:22:52 +02:00
|
|
|
const float sum = *reduced_value;
|
|
|
|
|
MEM_freeN(reduced_value);
|
|
|
|
|
GPU_shader_unbind();
|
|
|
|
|
|
|
|
|
|
return sum;
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-07 13:05:43 +02:00
|
|
|
static float sum_green_squared_difference_cpu(const Result &result, const float subtrahend)
|
|
|
|
|
{
|
|
|
|
|
return float(parallel_reduce(
|
|
|
|
|
result.domain().size,
|
|
|
|
|
0.0,
|
|
|
|
|
[&](const int2 texel, double &accumulated_value) {
|
2024-12-05 16:55:06 +01:00
|
|
|
accumulated_value += math::square(result.load_pixel<float4>(texel).y - subtrahend);
|
2024-11-07 13:05:43 +02:00
|
|
|
},
|
|
|
|
|
[&](const double &a, const double &b) { return a + b; }));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
float sum_green_squared_difference(Context &context, const Result &result, const float subtrahend)
|
|
|
|
|
{
|
|
|
|
|
if (context.use_gpu()) {
|
|
|
|
|
return sum_green_squared_difference_gpu(context, result, subtrahend);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return sum_green_squared_difference_cpu(result, subtrahend);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static float sum_blue_squared_difference_gpu(Context &context,
|
|
|
|
|
const Result &result,
|
|
|
|
|
const float subtrahend)
|
2022-10-11 13:22:52 +02:00
|
|
|
{
|
2025-08-11 09:34:28 +02:00
|
|
|
gpu::Shader *shader = context.get_shader("compositor_sum_blue_squared_difference",
|
|
|
|
|
ResultPrecision::Full);
|
2022-10-11 13:22:52 +02:00
|
|
|
GPU_shader_bind(shader);
|
|
|
|
|
|
|
|
|
|
GPU_shader_uniform_1f(shader, "subtrahend", subtrahend);
|
|
|
|
|
|
Realtime Compositor: Support full precision compositing
This patch adds support for full precision compositing for the Realtime
Compositor. A new precision option was added to the compositor to change
between half and full precision compositing, where the Auto option uses
half for the viewport compositor and the interactive render compositor,
while full is used for final renders.
The compositor context now need to implement the get_precision() method
to indicate its preferred precision. Intermediate results will be stored
using the context's precision, with a number of exceptions that can use
a different precision regardless of the context's precision. For
instance, summed area tables are always stored in full float results
even if the context specified half float. Conversely, jump flooding
tables are always stored in half integer results even if the context
specified full. The former requires full float while the latter has no
use for it.
Since shaders are created for a specific precision, we need two variants
of each compositor shader to account for the context's possible
precision. However, to avoid doubling the shader info count and reduce
boilerplate code and development time, an automated mechanism was
employed. A single shader info of whatever precision needs to be added,
then, at runtime, the shader info can be adjusted to change the
precision of the outputs. That shader variant is then cached in the
static cache manager for future processing-free shader retrieval.
Therefore, the shader manager was removed in favor of a cached shader
container in the static cache manager.
A number of utilities were added to make the creation of results as well as
the retrieval of shader with the target precision easier. Further, a
number of precision-specific shaders were removed in favor of more
generic ones that utilizes the aforementioned shader retrieval
mechanism.
Pull Request: https://projects.blender.org/blender/blender/pulls/113476
2023-11-08 08:32:00 +01:00
|
|
|
float *reduced_value = parallel_reduction_dispatch(
|
2025-02-12 15:59:45 +01:00
|
|
|
result, shader, Result::gpu_texture_format(ResultType::Float, ResultPrecision::Full));
|
2022-10-11 13:22:52 +02:00
|
|
|
const float sum = *reduced_value;
|
|
|
|
|
MEM_freeN(reduced_value);
|
|
|
|
|
GPU_shader_unbind();
|
|
|
|
|
|
|
|
|
|
return sum;
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-07 13:05:43 +02:00
|
|
|
static float sum_blue_squared_difference_cpu(const Result &result, const float subtrahend)
|
|
|
|
|
{
|
|
|
|
|
return float(parallel_reduce(
|
|
|
|
|
result.domain().size,
|
|
|
|
|
0.0,
|
|
|
|
|
[&](const int2 texel, double &accumulated_value) {
|
2024-12-05 16:55:06 +01:00
|
|
|
accumulated_value += math::square(result.load_pixel<float4>(texel).z - subtrahend);
|
2024-11-07 13:05:43 +02:00
|
|
|
},
|
|
|
|
|
[&](const double &a, const double &b) { return a + b; }));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
float sum_blue_squared_difference(Context &context, const Result &result, const float subtrahend)
|
|
|
|
|
{
|
|
|
|
|
if (context.use_gpu()) {
|
|
|
|
|
return sum_blue_squared_difference_gpu(context, result, subtrahend);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return sum_blue_squared_difference_cpu(result, subtrahend);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static float sum_luminance_squared_difference_gpu(Context &context,
|
|
|
|
|
const Result &result,
|
|
|
|
|
const float3 &luminance_coefficients,
|
|
|
|
|
const float subtrahend)
|
2022-10-11 13:22:52 +02:00
|
|
|
{
|
2025-08-11 09:34:28 +02:00
|
|
|
gpu::Shader *shader = context.get_shader("compositor_sum_luminance_squared_difference",
|
|
|
|
|
ResultPrecision::Full);
|
2022-10-11 13:22:52 +02:00
|
|
|
GPU_shader_bind(shader);
|
|
|
|
|
|
|
|
|
|
GPU_shader_uniform_3fv(shader, "luminance_coefficients", luminance_coefficients);
|
|
|
|
|
GPU_shader_uniform_1f(shader, "subtrahend", subtrahend);
|
|
|
|
|
|
Realtime Compositor: Support full precision compositing
This patch adds support for full precision compositing for the Realtime
Compositor. A new precision option was added to the compositor to change
between half and full precision compositing, where the Auto option uses
half for the viewport compositor and the interactive render compositor,
while full is used for final renders.
The compositor context now need to implement the get_precision() method
to indicate its preferred precision. Intermediate results will be stored
using the context's precision, with a number of exceptions that can use
a different precision regardless of the context's precision. For
instance, summed area tables are always stored in full float results
even if the context specified half float. Conversely, jump flooding
tables are always stored in half integer results even if the context
specified full. The former requires full float while the latter has no
use for it.
Since shaders are created for a specific precision, we need two variants
of each compositor shader to account for the context's possible
precision. However, to avoid doubling the shader info count and reduce
boilerplate code and development time, an automated mechanism was
employed. A single shader info of whatever precision needs to be added,
then, at runtime, the shader info can be adjusted to change the
precision of the outputs. That shader variant is then cached in the
static cache manager for future processing-free shader retrieval.
Therefore, the shader manager was removed in favor of a cached shader
container in the static cache manager.
A number of utilities were added to make the creation of results as well as
the retrieval of shader with the target precision easier. Further, a
number of precision-specific shaders were removed in favor of more
generic ones that utilizes the aforementioned shader retrieval
mechanism.
Pull Request: https://projects.blender.org/blender/blender/pulls/113476
2023-11-08 08:32:00 +01:00
|
|
|
float *reduced_value = parallel_reduction_dispatch(
|
2025-02-12 15:59:45 +01:00
|
|
|
result, shader, Result::gpu_texture_format(ResultType::Float, ResultPrecision::Full));
|
2022-10-11 13:22:52 +02:00
|
|
|
const float sum = *reduced_value;
|
|
|
|
|
MEM_freeN(reduced_value);
|
|
|
|
|
GPU_shader_unbind();
|
|
|
|
|
|
|
|
|
|
return sum;
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-07 13:05:43 +02:00
|
|
|
static float sum_luminance_squared_difference_cpu(const Result &result,
|
|
|
|
|
const float3 &luminance_coefficients,
|
|
|
|
|
const float subtrahend)
|
|
|
|
|
{
|
|
|
|
|
return float(parallel_reduce(
|
|
|
|
|
result.domain().size,
|
|
|
|
|
0.0,
|
|
|
|
|
[&](const int2 texel, double &accumulated_value) {
|
2024-12-05 16:55:06 +01:00
|
|
|
const float luminance = math::dot(result.load_pixel<float4>(texel).xyz(),
|
|
|
|
|
luminance_coefficients);
|
2024-11-07 13:05:43 +02:00
|
|
|
accumulated_value += math::square(luminance - subtrahend);
|
|
|
|
|
},
|
|
|
|
|
[&](const double &a, const double &b) { return a + b; }));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
float sum_luminance_squared_difference(Context &context,
|
|
|
|
|
const Result &result,
|
|
|
|
|
const float3 &luminance_coefficients,
|
|
|
|
|
const float subtrahend)
|
|
|
|
|
{
|
|
|
|
|
if (context.use_gpu()) {
|
|
|
|
|
return sum_luminance_squared_difference_gpu(
|
|
|
|
|
context, result, luminance_coefficients, subtrahend);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return sum_luminance_squared_difference_cpu(result, luminance_coefficients, subtrahend);
|
|
|
|
|
}
|
|
|
|
|
|
2022-10-20 15:02:41 +02:00
|
|
|
/* --------------------------------------------------------------------
|
|
|
|
|
* Maximum Reductions.
|
|
|
|
|
*/
|
|
|
|
|
|
2024-11-07 13:05:43 +02:00
|
|
|
static float maximum_luminance_gpu(Context &context,
|
|
|
|
|
const Result &result,
|
|
|
|
|
const float3 &luminance_coefficients)
|
2022-10-20 15:02:41 +02:00
|
|
|
{
|
2025-08-11 09:34:28 +02:00
|
|
|
gpu::Shader *shader = context.get_shader("compositor_maximum_luminance", ResultPrecision::Full);
|
2022-10-20 15:02:41 +02:00
|
|
|
GPU_shader_bind(shader);
|
|
|
|
|
|
|
|
|
|
GPU_shader_uniform_3fv(shader, "luminance_coefficients", luminance_coefficients);
|
|
|
|
|
|
Realtime Compositor: Support full precision compositing
This patch adds support for full precision compositing for the Realtime
Compositor. A new precision option was added to the compositor to change
between half and full precision compositing, where the Auto option uses
half for the viewport compositor and the interactive render compositor,
while full is used for final renders.
The compositor context now need to implement the get_precision() method
to indicate its preferred precision. Intermediate results will be stored
using the context's precision, with a number of exceptions that can use
a different precision regardless of the context's precision. For
instance, summed area tables are always stored in full float results
even if the context specified half float. Conversely, jump flooding
tables are always stored in half integer results even if the context
specified full. The former requires full float while the latter has no
use for it.
Since shaders are created for a specific precision, we need two variants
of each compositor shader to account for the context's possible
precision. However, to avoid doubling the shader info count and reduce
boilerplate code and development time, an automated mechanism was
employed. A single shader info of whatever precision needs to be added,
then, at runtime, the shader info can be adjusted to change the
precision of the outputs. That shader variant is then cached in the
static cache manager for future processing-free shader retrieval.
Therefore, the shader manager was removed in favor of a cached shader
container in the static cache manager.
A number of utilities were added to make the creation of results as well as
the retrieval of shader with the target precision easier. Further, a
number of precision-specific shaders were removed in favor of more
generic ones that utilizes the aforementioned shader retrieval
mechanism.
Pull Request: https://projects.blender.org/blender/blender/pulls/113476
2023-11-08 08:32:00 +01:00
|
|
|
float *reduced_value = parallel_reduction_dispatch(
|
2025-02-12 15:59:45 +01:00
|
|
|
result, shader, Result::gpu_texture_format(ResultType::Float, ResultPrecision::Full));
|
2022-10-20 15:02:41 +02:00
|
|
|
const float maximum = *reduced_value;
|
|
|
|
|
MEM_freeN(reduced_value);
|
|
|
|
|
GPU_shader_unbind();
|
|
|
|
|
|
|
|
|
|
return maximum;
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-07 13:05:43 +02:00
|
|
|
static float maximum_luminance_cpu(const Result &result, const float3 &luminance_coefficients)
|
|
|
|
|
{
|
|
|
|
|
return float(parallel_reduce(
|
|
|
|
|
result.domain().size,
|
|
|
|
|
std::numeric_limits<float>::lowest(),
|
|
|
|
|
[&](const int2 texel, float &accumulated_value) {
|
2024-12-05 16:55:06 +01:00
|
|
|
const float luminance = math::dot(result.load_pixel<float4>(texel).xyz(),
|
|
|
|
|
luminance_coefficients);
|
2024-11-07 13:05:43 +02:00
|
|
|
accumulated_value = math::max(accumulated_value, luminance);
|
|
|
|
|
},
|
|
|
|
|
[&](const float &a, const float &b) { return math::max(a, b); }));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
float maximum_luminance(Context &context,
|
|
|
|
|
const Result &result,
|
|
|
|
|
const float3 &luminance_coefficients)
|
|
|
|
|
{
|
|
|
|
|
if (context.use_gpu()) {
|
|
|
|
|
return maximum_luminance_gpu(context, result, luminance_coefficients);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return maximum_luminance_cpu(result, luminance_coefficients);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static float maximum_float_gpu(Context &context, const Result &result)
|
2024-02-09 11:52:41 +01:00
|
|
|
{
|
2025-08-11 09:34:28 +02:00
|
|
|
gpu::Shader *shader = context.get_shader("compositor_maximum_float", ResultPrecision::Full);
|
2024-02-09 11:52:41 +01:00
|
|
|
GPU_shader_bind(shader);
|
|
|
|
|
|
|
|
|
|
float *reduced_value = parallel_reduction_dispatch(
|
2025-02-12 15:59:45 +01:00
|
|
|
result, shader, Result::gpu_texture_format(ResultType::Float, ResultPrecision::Full));
|
2024-02-09 11:52:41 +01:00
|
|
|
const float maximum = *reduced_value;
|
|
|
|
|
MEM_freeN(reduced_value);
|
|
|
|
|
GPU_shader_unbind();
|
|
|
|
|
|
|
|
|
|
return maximum;
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-07 13:05:43 +02:00
|
|
|
static float maximum_float_cpu(const Result &result)
|
|
|
|
|
{
|
|
|
|
|
return float(parallel_reduce(
|
|
|
|
|
result.domain().size,
|
|
|
|
|
std::numeric_limits<float>::lowest(),
|
|
|
|
|
[&](const int2 texel, float &accumulated_value) {
|
2024-12-05 16:55:06 +01:00
|
|
|
accumulated_value = math::max(accumulated_value, result.load_pixel<float>(texel));
|
2024-11-07 13:05:43 +02:00
|
|
|
},
|
|
|
|
|
[&](const float &a, const float &b) { return math::max(a, b); }));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
float maximum_float(Context &context, const Result &result)
|
|
|
|
|
{
|
|
|
|
|
if (context.use_gpu()) {
|
|
|
|
|
return maximum_float_gpu(context, result);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return maximum_float_cpu(result);
|
|
|
|
|
}
|
|
|
|
|
|
2025-06-23 14:34:37 +02:00
|
|
|
static float2 maximum_float2_gpu(Context &context, const Result &result)
|
2025-05-26 16:18:48 +02:00
|
|
|
{
|
2025-08-11 09:34:28 +02:00
|
|
|
gpu::Shader *shader = context.get_shader("compositor_maximum_float2", ResultPrecision::Full);
|
2025-05-26 16:18:48 +02:00
|
|
|
GPU_shader_bind(shader);
|
|
|
|
|
|
|
|
|
|
float *reduced_value = parallel_reduction_dispatch(
|
2025-06-23 14:34:37 +02:00
|
|
|
result, shader, Result::gpu_texture_format(ResultType::Float2, ResultPrecision::Full));
|
|
|
|
|
const float2 maximum = reduced_value;
|
2025-05-26 16:18:48 +02:00
|
|
|
MEM_freeN(reduced_value);
|
|
|
|
|
GPU_shader_unbind();
|
|
|
|
|
|
|
|
|
|
return maximum;
|
|
|
|
|
}
|
|
|
|
|
|
2025-06-23 14:34:37 +02:00
|
|
|
static float2 maximum_float2_cpu(const Result &result)
|
2025-05-26 16:18:48 +02:00
|
|
|
{
|
|
|
|
|
return parallel_reduce(
|
|
|
|
|
result.domain().size,
|
2025-06-23 14:34:37 +02:00
|
|
|
float2(std::numeric_limits<float>::lowest()),
|
|
|
|
|
[&](const int2 texel, float2 &accumulated_value) {
|
|
|
|
|
accumulated_value = math::max(accumulated_value, result.load_pixel<float2>(texel));
|
2025-05-26 16:18:48 +02:00
|
|
|
},
|
2025-06-23 14:34:37 +02:00
|
|
|
[&](const float2 &a, const float2 &b) { return math::max(a, b); });
|
2025-05-26 16:18:48 +02:00
|
|
|
}
|
|
|
|
|
|
2025-06-23 14:34:37 +02:00
|
|
|
float2 maximum_float2(Context &context, const Result &result)
|
2025-05-26 16:18:48 +02:00
|
|
|
{
|
|
|
|
|
if (context.use_gpu()) {
|
2025-06-23 14:34:37 +02:00
|
|
|
return maximum_float2_gpu(context, result);
|
2025-05-26 16:18:48 +02:00
|
|
|
}
|
|
|
|
|
|
2025-06-23 14:34:37 +02:00
|
|
|
return maximum_float2_cpu(result);
|
2025-05-26 16:18:48 +02:00
|
|
|
}
|
|
|
|
|
|
2024-11-07 13:05:43 +02:00
|
|
|
static float maximum_float_in_range_gpu(Context &context,
|
|
|
|
|
const Result &result,
|
|
|
|
|
const float lower_bound,
|
|
|
|
|
const float upper_bound)
|
2022-10-20 16:31:35 +02:00
|
|
|
{
|
2025-08-11 09:34:28 +02:00
|
|
|
gpu::Shader *shader = context.get_shader("compositor_maximum_float_in_range",
|
|
|
|
|
ResultPrecision::Full);
|
2022-10-20 16:31:35 +02:00
|
|
|
GPU_shader_bind(shader);
|
|
|
|
|
|
|
|
|
|
GPU_shader_uniform_1f(shader, "lower_bound", lower_bound);
|
|
|
|
|
GPU_shader_uniform_1f(shader, "upper_bound", upper_bound);
|
|
|
|
|
|
Realtime Compositor: Support full precision compositing
This patch adds support for full precision compositing for the Realtime
Compositor. A new precision option was added to the compositor to change
between half and full precision compositing, where the Auto option uses
half for the viewport compositor and the interactive render compositor,
while full is used for final renders.
The compositor context now need to implement the get_precision() method
to indicate its preferred precision. Intermediate results will be stored
using the context's precision, with a number of exceptions that can use
a different precision regardless of the context's precision. For
instance, summed area tables are always stored in full float results
even if the context specified half float. Conversely, jump flooding
tables are always stored in half integer results even if the context
specified full. The former requires full float while the latter has no
use for it.
Since shaders are created for a specific precision, we need two variants
of each compositor shader to account for the context's possible
precision. However, to avoid doubling the shader info count and reduce
boilerplate code and development time, an automated mechanism was
employed. A single shader info of whatever precision needs to be added,
then, at runtime, the shader info can be adjusted to change the
precision of the outputs. That shader variant is then cached in the
static cache manager for future processing-free shader retrieval.
Therefore, the shader manager was removed in favor of a cached shader
container in the static cache manager.
A number of utilities were added to make the creation of results as well as
the retrieval of shader with the target precision easier. Further, a
number of precision-specific shaders were removed in favor of more
generic ones that utilizes the aforementioned shader retrieval
mechanism.
Pull Request: https://projects.blender.org/blender/blender/pulls/113476
2023-11-08 08:32:00 +01:00
|
|
|
float *reduced_value = parallel_reduction_dispatch(
|
2025-02-12 15:59:45 +01:00
|
|
|
result, shader, Result::gpu_texture_format(ResultType::Float, ResultPrecision::Full));
|
2022-10-20 16:31:35 +02:00
|
|
|
const float maximum = *reduced_value;
|
|
|
|
|
MEM_freeN(reduced_value);
|
|
|
|
|
GPU_shader_unbind();
|
|
|
|
|
|
|
|
|
|
return maximum;
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-07 13:05:43 +02:00
|
|
|
static float maximum_float_in_range_cpu(const Result &result,
|
|
|
|
|
const float lower_bound,
|
|
|
|
|
const float upper_bound)
|
|
|
|
|
{
|
|
|
|
|
return float(parallel_reduce(
|
|
|
|
|
result.domain().size,
|
|
|
|
|
lower_bound,
|
|
|
|
|
[&](const int2 texel, float &accumulated_value) {
|
2024-12-05 16:55:06 +01:00
|
|
|
const float value = result.load_pixel<float>(texel);
|
2024-11-07 13:05:43 +02:00
|
|
|
if ((value <= upper_bound) && (value >= lower_bound)) {
|
|
|
|
|
accumulated_value = math::max(accumulated_value, value);
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
[&](const float &a, const float &b) { return math::max(a, b); }));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
float maximum_float_in_range(Context &context,
|
|
|
|
|
const Result &result,
|
|
|
|
|
const float lower_bound,
|
|
|
|
|
const float upper_bound)
|
|
|
|
|
{
|
|
|
|
|
if (context.use_gpu()) {
|
|
|
|
|
return maximum_float_in_range_gpu(context, result, lower_bound, upper_bound);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return maximum_float_in_range_cpu(result, lower_bound, upper_bound);
|
|
|
|
|
}
|
|
|
|
|
|
2022-10-20 15:02:41 +02:00
|
|
|
/* --------------------------------------------------------------------
|
|
|
|
|
* Minimum Reductions.
|
|
|
|
|
*/
|
|
|
|
|
|
2024-11-07 13:05:43 +02:00
|
|
|
static float minimum_luminance_gpu(Context &context,
|
|
|
|
|
const Result &result,
|
|
|
|
|
const float3 &luminance_coefficients)
|
2022-10-20 15:02:41 +02:00
|
|
|
{
|
2025-08-11 09:34:28 +02:00
|
|
|
gpu::Shader *shader = context.get_shader("compositor_minimum_luminance", ResultPrecision::Full);
|
2022-10-20 15:02:41 +02:00
|
|
|
GPU_shader_bind(shader);
|
|
|
|
|
|
|
|
|
|
GPU_shader_uniform_3fv(shader, "luminance_coefficients", luminance_coefficients);
|
|
|
|
|
|
2024-02-09 11:52:41 +01:00
|
|
|
float *reduced_value = parallel_reduction_dispatch(
|
2025-02-12 15:59:45 +01:00
|
|
|
result, shader, Result::gpu_texture_format(ResultType::Float, ResultPrecision::Full));
|
2024-02-09 11:52:41 +01:00
|
|
|
const float minimum = *reduced_value;
|
|
|
|
|
MEM_freeN(reduced_value);
|
|
|
|
|
GPU_shader_unbind();
|
|
|
|
|
|
|
|
|
|
return minimum;
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-07 13:05:43 +02:00
|
|
|
static float minimum_luminance_cpu(const Result &result, const float3 &luminance_coefficients)
|
|
|
|
|
{
|
|
|
|
|
return float(parallel_reduce(
|
|
|
|
|
result.domain().size,
|
|
|
|
|
std::numeric_limits<float>::max(),
|
|
|
|
|
[&](const int2 texel, float &accumulated_value) {
|
2024-12-05 16:55:06 +01:00
|
|
|
const float luminance = math::dot(result.load_pixel<float4>(texel).xyz(),
|
|
|
|
|
luminance_coefficients);
|
2024-11-07 13:05:43 +02:00
|
|
|
accumulated_value = math::min(accumulated_value, luminance);
|
|
|
|
|
},
|
|
|
|
|
[&](const float &a, const float &b) { return math::min(a, b); }));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
float minimum_luminance(Context &context,
|
|
|
|
|
const Result &result,
|
|
|
|
|
const float3 &luminance_coefficients)
|
|
|
|
|
{
|
|
|
|
|
if (context.use_gpu()) {
|
|
|
|
|
return minimum_luminance_gpu(context, result, luminance_coefficients);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return minimum_luminance_cpu(result, luminance_coefficients);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static float minimum_float_gpu(Context &context, const Result &result)
|
2024-02-09 11:52:41 +01:00
|
|
|
{
|
2025-08-11 09:34:28 +02:00
|
|
|
gpu::Shader *shader = context.get_shader("compositor_minimum_float", ResultPrecision::Full);
|
2024-02-09 11:52:41 +01:00
|
|
|
GPU_shader_bind(shader);
|
|
|
|
|
|
Realtime Compositor: Support full precision compositing
This patch adds support for full precision compositing for the Realtime
Compositor. A new precision option was added to the compositor to change
between half and full precision compositing, where the Auto option uses
half for the viewport compositor and the interactive render compositor,
while full is used for final renders.
The compositor context now need to implement the get_precision() method
to indicate its preferred precision. Intermediate results will be stored
using the context's precision, with a number of exceptions that can use
a different precision regardless of the context's precision. For
instance, summed area tables are always stored in full float results
even if the context specified half float. Conversely, jump flooding
tables are always stored in half integer results even if the context
specified full. The former requires full float while the latter has no
use for it.
Since shaders are created for a specific precision, we need two variants
of each compositor shader to account for the context's possible
precision. However, to avoid doubling the shader info count and reduce
boilerplate code and development time, an automated mechanism was
employed. A single shader info of whatever precision needs to be added,
then, at runtime, the shader info can be adjusted to change the
precision of the outputs. That shader variant is then cached in the
static cache manager for future processing-free shader retrieval.
Therefore, the shader manager was removed in favor of a cached shader
container in the static cache manager.
A number of utilities were added to make the creation of results as well as
the retrieval of shader with the target precision easier. Further, a
number of precision-specific shaders were removed in favor of more
generic ones that utilizes the aforementioned shader retrieval
mechanism.
Pull Request: https://projects.blender.org/blender/blender/pulls/113476
2023-11-08 08:32:00 +01:00
|
|
|
float *reduced_value = parallel_reduction_dispatch(
|
2025-02-12 15:59:45 +01:00
|
|
|
result, shader, Result::gpu_texture_format(ResultType::Float, ResultPrecision::Full));
|
2022-10-20 15:02:41 +02:00
|
|
|
const float minimum = *reduced_value;
|
|
|
|
|
MEM_freeN(reduced_value);
|
|
|
|
|
GPU_shader_unbind();
|
|
|
|
|
|
|
|
|
|
return minimum;
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-07 13:05:43 +02:00
|
|
|
static float minimum_float_cpu(const Result &result)
|
|
|
|
|
{
|
|
|
|
|
return float(parallel_reduce(
|
|
|
|
|
result.domain().size,
|
|
|
|
|
std::numeric_limits<float>::max(),
|
|
|
|
|
[&](const int2 texel, float &accumulated_value) {
|
2024-12-05 16:55:06 +01:00
|
|
|
accumulated_value = math::min(accumulated_value, result.load_pixel<float>(texel));
|
2024-11-07 13:05:43 +02:00
|
|
|
},
|
|
|
|
|
[&](const float &a, const float &b) { return math::min(a, b); }));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
float minimum_float(Context &context, const Result &result)
|
|
|
|
|
{
|
|
|
|
|
if (context.use_gpu()) {
|
|
|
|
|
return minimum_float_gpu(context, result);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return minimum_float_cpu(result);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static float minimum_float_in_range_gpu(Context &context,
|
|
|
|
|
const Result &result,
|
|
|
|
|
const float lower_bound,
|
|
|
|
|
const float upper_bound)
|
2022-10-20 16:31:35 +02:00
|
|
|
{
|
2025-08-11 09:34:28 +02:00
|
|
|
gpu::Shader *shader = context.get_shader("compositor_minimum_float_in_range",
|
|
|
|
|
ResultPrecision::Full);
|
2022-10-20 16:31:35 +02:00
|
|
|
GPU_shader_bind(shader);
|
|
|
|
|
|
|
|
|
|
GPU_shader_uniform_1f(shader, "lower_bound", lower_bound);
|
|
|
|
|
GPU_shader_uniform_1f(shader, "upper_bound", upper_bound);
|
|
|
|
|
|
Realtime Compositor: Support full precision compositing
This patch adds support for full precision compositing for the Realtime
Compositor. A new precision option was added to the compositor to change
between half and full precision compositing, where the Auto option uses
half for the viewport compositor and the interactive render compositor,
while full is used for final renders.
The compositor context now need to implement the get_precision() method
to indicate its preferred precision. Intermediate results will be stored
using the context's precision, with a number of exceptions that can use
a different precision regardless of the context's precision. For
instance, summed area tables are always stored in full float results
even if the context specified half float. Conversely, jump flooding
tables are always stored in half integer results even if the context
specified full. The former requires full float while the latter has no
use for it.
Since shaders are created for a specific precision, we need two variants
of each compositor shader to account for the context's possible
precision. However, to avoid doubling the shader info count and reduce
boilerplate code and development time, an automated mechanism was
employed. A single shader info of whatever precision needs to be added,
then, at runtime, the shader info can be adjusted to change the
precision of the outputs. That shader variant is then cached in the
static cache manager for future processing-free shader retrieval.
Therefore, the shader manager was removed in favor of a cached shader
container in the static cache manager.
A number of utilities were added to make the creation of results as well as
the retrieval of shader with the target precision easier. Further, a
number of precision-specific shaders were removed in favor of more
generic ones that utilizes the aforementioned shader retrieval
mechanism.
Pull Request: https://projects.blender.org/blender/blender/pulls/113476
2023-11-08 08:32:00 +01:00
|
|
|
float *reduced_value = parallel_reduction_dispatch(
|
2025-02-12 15:59:45 +01:00
|
|
|
result, shader, Result::gpu_texture_format(ResultType::Float, ResultPrecision::Full));
|
2022-10-20 16:31:35 +02:00
|
|
|
const float minimum = *reduced_value;
|
|
|
|
|
MEM_freeN(reduced_value);
|
|
|
|
|
GPU_shader_unbind();
|
|
|
|
|
|
|
|
|
|
return minimum;
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-07 13:05:43 +02:00
|
|
|
static float minimum_float_in_range_cpu(const Result &result,
|
|
|
|
|
const float lower_bound,
|
|
|
|
|
const float upper_bound)
|
|
|
|
|
{
|
|
|
|
|
return parallel_reduce(
|
|
|
|
|
result.domain().size,
|
|
|
|
|
upper_bound,
|
|
|
|
|
[&](const int2 texel, float &accumulated_value) {
|
2024-12-05 16:55:06 +01:00
|
|
|
const float value = result.load_pixel<float>(texel);
|
2024-11-07 13:05:43 +02:00
|
|
|
if ((value <= upper_bound) && (value >= lower_bound)) {
|
|
|
|
|
accumulated_value = math::min(accumulated_value, value);
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
[&](const float &a, const float &b) { return math::min(a, b); });
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
float minimum_float_in_range(Context &context,
|
|
|
|
|
const Result &result,
|
|
|
|
|
const float lower_bound,
|
|
|
|
|
const float upper_bound)
|
|
|
|
|
{
|
|
|
|
|
if (context.use_gpu()) {
|
|
|
|
|
return minimum_float_in_range_gpu(context, result, lower_bound, upper_bound);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return minimum_float_in_range_cpu(result, lower_bound, upper_bound);
|
|
|
|
|
}
|
|
|
|
|
|
2024-12-17 11:39:04 +01:00
|
|
|
} // namespace blender::compositor
|