diff --git a/source/blender/compositor/shaders/CMakeLists.txt b/source/blender/compositor/shaders/CMakeLists.txt index 6552cb38b0e..ee052664dc8 100644 --- a/source/blender/compositor/shaders/CMakeLists.txt +++ b/source/blender/compositor/shaders/CMakeLists.txt @@ -1,6 +1,7 @@ # SPDX-FileCopyrightText: 2024 Blender Authors # # SPDX-License-Identifier: GPL-2.0-or-later + set(INC_GLSL . .. @@ -11,6 +12,7 @@ set(INC_GLSL ../../gpu/shaders/common ../../gpu/shaders/infos ) + set(SRC_GLSL_COMP # TODO: Port to C++ compilation # compositor_alpha_crop.glsl @@ -86,7 +88,7 @@ set(SRC_GLSL_COMP # compositor_movie_distortion.glsl # compositor_normalize.glsl # compositor_pad.glsl -# compositor_parallel_reduction.glsl + compositor_parallel_reduction.glsl # compositor_pixelate.glsl # compositor_plane_deform_anisotropic.glsl # compositor_plane_deform_mask.glsl @@ -123,14 +125,17 @@ set(SRC_GLSL_COMP # compositor_z_combine_simple_depth.glsl # compositor_z_combine_simple_image.glsl ) + set(SRC_GLSL_LIB library/gpu_shader_compositor_texture_utilities.glsl ) + # Compile shaders with shader code. if(WITH_GPU_SHADER_CPP_COMPILATION) with_shader_cpp_compilation_config() - #compile_sources_as_cpp(compositor_cpp_shaders_comp "${SRC_GLSL_COMP}" "GPU_COMPUTE_SHADER") + + compile_sources_as_cpp(compositor_cpp_shaders_comp "${SRC_GLSL_COMP}" "GPU_COMPUTE_SHADER") # Only enable to make sure they compile on their own. # Otherwise it creates a warning about `pragma once`. compile_sources_as_cpp(compositor_cpp_shaders_lib "${SRC_GLSL_LIB}" "GPU_LIBRARY_SHADER") -endif() \ No newline at end of file +endif() diff --git a/source/blender/compositor/shaders/compositor_parallel_reduction.glsl b/source/blender/compositor/shaders/compositor_parallel_reduction.glsl index e0c2bd4dde1..e7b9d153542 100644 --- a/source/blender/compositor/shaders/compositor_parallel_reduction.glsl +++ b/source/blender/compositor/shaders/compositor_parallel_reduction.glsl @@ -48,15 +48,229 @@ * expected to define the TYPE macro to be a float or a vec4, depending on the type of data being * reduced. */ +#include "infos/compositor_parallel_reduction_infos.hh" + +COMPUTE_SHADER_CREATE_INFO(compositor_parallel_reduction_shared) +COMPUTE_SHADER_CREATE_INFO(compositor_parallel_reduction_output_float4) + #include "gpu_shader_compositor_texture_utilities.glsl" +#include "gpu_shader_math_base_lib.glsl" #include "gpu_shader_math_vector_lib.glsl" #include "gpu_shader_math_vector_reduce_lib.glsl" #include "gpu_shader_utildefines_lib.glsl" #define reduction_size (gl_WorkGroupSize.x * gl_WorkGroupSize.y) -shared TYPE reduction_data[reduction_size]; -void main() +/* Operations */ + +template struct Min { + static T identity() + { + return T(0); + } + + static T initialize(T value) + { + return value; + } + + static T reduce(T lhs, T rhs) + { + return min(lhs, rhs); + } +}; +template struct Min; + +template struct Max { + static T identity() + { + return T(0); + } + + static T initialize(T value) + { + return value; + } + + static T reduce(T lhs, T rhs) + { + return max(lhs, rhs); + } +}; +template struct Max; +template struct Max; +template struct Max; + +template struct Sum { + static T identity() + { + return T(0); + } + + static T initialize(T value) + { + return value; + } + + static T reduce(T lhs, T rhs) + { + return lhs + rhs; + } +}; +template struct Sum; +template struct Sum; + +template struct MaxVelocity { + static T identity() + { + return T(0); + } + + static T initialize(T value) + { + return value; + } + + static T reduce(T lhs, T rhs) + { + return float4(dot(lhs.xy, lhs.xy) > dot(rhs.xy, rhs.xy) ? lhs.xy : rhs.xy, + dot(lhs.zw, lhs.zw) > dot(rhs.zw, rhs.zw) ? lhs.zw : rhs.zw); + } +}; +template struct MaxVelocity; + +template struct MaxInRange { + static T identity() + { + return T(0); + } + + static T initialize(T value) + { + float max = push_constant_get(compositor_maximum_float_in_range, upper_bound); + float min = push_constant_get(compositor_maximum_float_in_range, lower_bound); + return ((value <= max) && (value >= min)) ? value : min; + } + + static T reduce(T lhs, T rhs) + { + float max = push_constant_get(compositor_maximum_float_in_range, upper_bound); + return ((rhs > lhs) && (rhs <= max)) ? rhs : lhs; + } +}; +template struct MaxInRange; + +template struct MinInRange { + static T identity() + { + return T(0); + } + + static T initialize(T value) + { + float max = push_constant_get(compositor_minimum_float_in_range, upper_bound); + float min = push_constant_get(compositor_minimum_float_in_range, lower_bound); + return ((value <= max) && (value >= min)) ? value : max; + } + + static T reduce(T lhs, T rhs) + { + float min = push_constant_get(compositor_minimum_float_in_range, lower_bound); + return ((rhs < lhs) && (rhs >= min)) ? rhs : lhs; + } +}; +template struct MinInRange; + +template struct SumSquareDifference { + static T identity() + { + return T(0); + } + + static T initialize(T value) + { + float sub = push_constant_get(compositor_sum_squared_difference_float_shared, subtrahend); + return square(value - sub); + } + + static T reduce(T lhs, T rhs) + { + return lhs + rhs; + } +}; +template struct SumSquareDifference; + +/* ChannelMix */ +struct ChannelR {}; +struct ChannelG {}; +struct ChannelB {}; +struct ChannelRG {}; +struct ChannelRGBA {}; +struct ChannelLuma {}; +struct ChannelLogLuma {}; +struct ChannelMax {}; + +template T channel_mix(float4 value) +{ + return value; +} +template float4 channel_mix(float4); +/* clang-format off */ +template<> float channel_mix(float4 value) { return value.r; } +template<> float channel_mix(float4 value) { return value.g; } +template<> float channel_mix(float4 value) { return value.b; } +template<> float channel_mix(float4 value) { return reduce_max(value.rgb); } +template<> float2 channel_mix(float4 value) { return value.rg; } +/* clang-format on */ +template<> float channel_mix(float4 value) +{ + float3 coefficients = push_constant_get(compositor_luminance_shared, luminance_coefficients); + return dot(value.rgb, coefficients); +} +template<> float channel_mix(float4 value) +{ + float3 coefficients = push_constant_get(compositor_luminance_shared, luminance_coefficients); + return log(max(dot(value.rgb, coefficients), 1e-5f)); +} + +/* clang-format off */ +template T load(float4 value) { return value; } +template float4 load(float4); +template<> float load(float4 value) { return value.x; } +template<> float2 load(float4 value) { return value.xy; } + +float4 to_float4(float value) { return float4(value); } +float4 to_float4(float2 value) { return value.xyyy; } +float4 to_float4(float4 value) { return value; } +/* clang-format on */ + +void load_shared_data(uint index, float &r_data) +{ + r_data = shared_variable_get(compositor_parallel_reduction_float_shared, reduction_data)[index]; +} +void load_shared_data(uint index, float2 &r_data) +{ + r_data = shared_variable_get(compositor_parallel_reduction_float2_shared, reduction_data)[index]; +} +void load_shared_data(uint index, float4 &r_data) +{ + r_data = shared_variable_get(compositor_parallel_reduction_float4_shared, reduction_data)[index]; +} + +void store_shared_data(uint index, float data) +{ + shared_variable_get(compositor_parallel_reduction_float_shared, reduction_data)[index] = data; +} +void store_shared_data(uint index, float2 data) +{ + shared_variable_get(compositor_parallel_reduction_float2_shared, reduction_data)[index] = data; +} +void store_shared_data(uint index, float4 data) +{ + shared_variable_get(compositor_parallel_reduction_float4_shared, reduction_data)[index] = data; +} + +template void reduction() { int2 texel = int2(gl_GlobalInvocationID.xy); @@ -65,7 +279,7 @@ void main() * not affect the output of the reduction. For instance, sum reductions have an identity of 0.0, * while max value reductions have an identity of FLT_MIN */ if (any(lessThan(texel, int2(0))) || any(greaterThanEqual(texel, texture_size(input_tx)))) { - reduction_data[gl_LocalInvocationIndex] = IDENTITY; + store_shared_data(gl_LocalInvocationIndex, Operation::identity()); } else { float4 value = texture_load_unbound(input_tx, texel); @@ -82,8 +296,9 @@ void main() * will be loaded directly and reduced without extra processing. So the developer is expected * to define the INITIALIZE and LOAD macros to be expressions that derive the needed value from * the loaded value for the initial reduction pass and latter ones respectively. */ - reduction_data[gl_LocalInvocationIndex] = is_initial_reduction ? INITIALIZE(value) : - LOAD(value); + T data = is_initial_reduction ? Operation::initialize(channel_mix(value)) : + load(value); + store_shared_data(gl_LocalInvocationIndex, data); } /* Reduce the reduction data by half on every iteration until only one element remains. See the @@ -101,8 +316,11 @@ void main() * lower index, as can be seen in the diagram above. The developer is expected to define the * REDUCE macro to be a commutative and associative binary operator suitable for parallel * reduction. */ - reduction_data[gl_LocalInvocationIndex] = REDUCE( - reduction_data[gl_LocalInvocationIndex], reduction_data[gl_LocalInvocationIndex + stride]); + T lhs, rhs; + load_shared_data(gl_LocalInvocationIndex, lhs); + load_shared_data(gl_LocalInvocationIndex + stride, rhs); + T result = Operation::reduce(lhs, rhs); + store_shared_data(gl_LocalInvocationIndex, result); } /* Finally, the result of the reduction is available as the first element in the reduction data, @@ -110,12 +328,117 @@ void main() * it. */ barrier(); if (gl_LocalInvocationIndex == 0) { - /* If no WRITE macro is provided, we assume the reduction type can be passed to the float4 - * constructor. If not, WRITE is expected to be defined to construct the output value. */ -#if defined(WRITE) - imageStore(output_img, int2(gl_WorkGroupID.xy), WRITE(reduction_data[0])); -#else - imageStore(output_img, int2(gl_WorkGroupID.xy), float4(reduction_data[0])); -#endif + T data; + load_shared_data(0, data); + imageStore(output_img, int2(gl_WorkGroupID.xy), to_float4(data)); } } + +template void reduction, ChannelLuma>(); +template void reduction, ChannelR>(); + +template void reduction, ChannelLuma>(); +template void reduction, ChannelMax>(); +template void reduction, ChannelR>(); +template void reduction, ChannelRG>(); + +template void reduction, ChannelR>(); +template void reduction, ChannelG>(); +template void reduction, ChannelB>(); +template void reduction, ChannelLuma>(); +template void reduction, ChannelLogLuma>(); +template void reduction, ChannelRGBA>(); + +template void reduction, ChannelR>(); +template void reduction, ChannelG>(); +template void reduction, ChannelB>(); +template void reduction, ChannelLuma>(); + +template void reduction, ChannelR>(); + +template void reduction, ChannelR>(); + +template void reduction, ChannelRGBA>(); + +void reduce_sum_red() +{ + reduction, ChannelR>(); +} +void reduce_sum_green() +{ + reduction, ChannelG>(); +} +void reduce_sum_blue() +{ + reduction, ChannelB>(); +} +void reduce_sum_luminance() +{ + reduction, ChannelLuma>(); +} +void reduce_sum_log_luminance() +{ + reduction, ChannelLogLuma>(); +} +void reduce_sum_color() +{ + reduction, ChannelRGBA>(); +} + +void reduce_sum_red_squared_difference() +{ + reduction, ChannelR>(); +} +void reduce_sum_green_squared_difference() +{ + reduction, ChannelG>(); +} +void reduce_sum_blue_squared_difference() +{ + reduction, ChannelB>(); +} +void reduce_sum_luminance_squared_difference() +{ + reduction, ChannelLuma>(); +} + +void reduce_maximum_luminance() +{ + reduction, ChannelLuma>(); +} +void reduce_maximum_brightness() +{ + reduction, ChannelMax>(); +} +void reduce_maximum_float() +{ + reduction, ChannelR>(); +} +void reduce_maximum_float2() +{ + reduction, ChannelRG>(); +} + +void reduce_maximum_float_in_range() +{ + reduction, ChannelR>(); +} + +void reduce_minimum_luminance() +{ + reduction, ChannelLuma>(); +} +void reduce_minimum_float() +{ + reduction, ChannelR>(); +} + +void reduce_minimum_float_in_range() +{ + reduction, ChannelR>(); +} + +void reduce_max_velocity() +{ + reduction, ChannelRGBA>(); +} diff --git a/source/blender/compositor/shaders/infos/compositor_parallel_reduction_infos.hh b/source/blender/compositor/shaders/infos/compositor_parallel_reduction_infos.hh index 6e84c706e5d..3ea092a566d 100644 --- a/source/blender/compositor/shaders/infos/compositor_parallel_reduction_infos.hh +++ b/source/blender/compositor/shaders/infos/compositor_parallel_reduction_infos.hh @@ -2,6 +2,12 @@ * * SPDX-License-Identifier: GPL-2.0-or-later */ +#pragma once + +#ifdef GPU_SHADER +# include "gpu_shader_compat.hh" +#endif + #include "gpu_shader_create_info.hh" GPU_SHADER_CREATE_INFO(compositor_parallel_reduction_shared) @@ -11,62 +17,89 @@ SAMPLER(0, sampler2D, input_tx) COMPUTE_SOURCE("compositor_parallel_reduction.glsl") GPU_SHADER_CREATE_END() +GPU_SHADER_CREATE_INFO(compositor_luminance_shared) +PUSH_CONSTANT(float3, luminance_coefficients) +GPU_SHADER_CREATE_END() + +GPU_SHADER_CREATE_INFO(compositor_parallel_reduction_float_shared) +GROUP_SHARED(float, reduction_data[gl_WorkGroupSize.x * gl_WorkGroupSize.y]) +GPU_SHADER_CREATE_END() + +GPU_SHADER_CREATE_INFO(compositor_parallel_reduction_float2_shared) +GROUP_SHARED(float2, reduction_data[gl_WorkGroupSize.x * gl_WorkGroupSize.y]) +GPU_SHADER_CREATE_END() + +GPU_SHADER_CREATE_INFO(compositor_parallel_reduction_float4_shared) +GROUP_SHARED(float4, reduction_data[gl_WorkGroupSize.x * gl_WorkGroupSize.y]) +GPU_SHADER_CREATE_END() + +GPU_SHADER_CREATE_INFO(compositor_parallel_reduction_half4_shared) +GROUP_SHARED(float4, reduction_data[gl_WorkGroupSize.x * gl_WorkGroupSize.y]) +GPU_SHADER_CREATE_END() + +GPU_SHADER_CREATE_INFO(compositor_parallel_reduction_output_float) +IMAGE(0, SFLOAT_32, write, image2D, output_img) +GPU_SHADER_CREATE_END() + +GPU_SHADER_CREATE_INFO(compositor_parallel_reduction_output_float2) +IMAGE(0, SFLOAT_32_32, write, image2D, output_img) +GPU_SHADER_CREATE_END() + +GPU_SHADER_CREATE_INFO(compositor_parallel_reduction_output_float4) +IMAGE(0, SFLOAT_32_32_32_32, write, image2D, output_img) +GPU_SHADER_CREATE_END() + +GPU_SHADER_CREATE_INFO(compositor_parallel_reduction_output_half4) +IMAGE(0, SFLOAT_16_16_16_16, write, image2D, output_img) +GPU_SHADER_CREATE_END() + /* -------------------------------------------------------------------- * Sum Reductions. */ -GPU_SHADER_CREATE_INFO(compositor_sum_shared) -ADDITIONAL_INFO(compositor_parallel_reduction_shared) -DEFINE_VALUE("REDUCE(lhs, rhs)", "lhs + rhs") -GPU_SHADER_CREATE_END() - GPU_SHADER_CREATE_INFO(compositor_sum_float_shared) -ADDITIONAL_INFO(compositor_sum_shared) -IMAGE(0, SFLOAT_32, write, image2D, output_img) -DEFINE_VALUE("TYPE", "float") -DEFINE_VALUE("IDENTITY", "0.0f") -DEFINE_VALUE("LOAD(value)", "value.x") +ADDITIONAL_INFO(compositor_parallel_reduction_shared) +ADDITIONAL_INFO(compositor_parallel_reduction_float_shared) +ADDITIONAL_INFO(compositor_parallel_reduction_output_float) GPU_SHADER_CREATE_END() GPU_SHADER_CREATE_INFO(compositor_sum_red) +COMPUTE_FUNCTION("reduce_sum_red") ADDITIONAL_INFO(compositor_sum_float_shared) -DEFINE_VALUE("INITIALIZE(value)", "value.r") DO_STATIC_COMPILATION() GPU_SHADER_CREATE_END() GPU_SHADER_CREATE_INFO(compositor_sum_green) +COMPUTE_FUNCTION("reduce_sum_green") ADDITIONAL_INFO(compositor_sum_float_shared) -DEFINE_VALUE("INITIALIZE(value)", "value.g") DO_STATIC_COMPILATION() GPU_SHADER_CREATE_END() GPU_SHADER_CREATE_INFO(compositor_sum_blue) +COMPUTE_FUNCTION("reduce_sum_blue") ADDITIONAL_INFO(compositor_sum_float_shared) -DEFINE_VALUE("INITIALIZE(value)", "value.b") DO_STATIC_COMPILATION() GPU_SHADER_CREATE_END() GPU_SHADER_CREATE_INFO(compositor_sum_luminance) +COMPUTE_FUNCTION("reduce_sum_luminance") ADDITIONAL_INFO(compositor_sum_float_shared) -PUSH_CONSTANT(float3, luminance_coefficients) -DEFINE_VALUE("INITIALIZE(value)", "dot(value.rgb, luminance_coefficients)") +ADDITIONAL_INFO(compositor_luminance_shared) DO_STATIC_COMPILATION() GPU_SHADER_CREATE_END() GPU_SHADER_CREATE_INFO(compositor_sum_log_luminance) +COMPUTE_FUNCTION("reduce_sum_log_luminance") ADDITIONAL_INFO(compositor_sum_float_shared) -PUSH_CONSTANT(float3, luminance_coefficients) -DEFINE_VALUE("INITIALIZE(value)", "log(max(dot(value.rgb, luminance_coefficients), 1e-5f))") +ADDITIONAL_INFO(compositor_luminance_shared) DO_STATIC_COMPILATION() GPU_SHADER_CREATE_END() GPU_SHADER_CREATE_INFO(compositor_sum_color) -ADDITIONAL_INFO(compositor_sum_shared) -IMAGE(0, SFLOAT_32_32_32_32, write, image2D, output_img) -DEFINE_VALUE("TYPE", "vec4") -DEFINE_VALUE("IDENTITY", "vec4(0.0f)") -DEFINE_VALUE("INITIALIZE(value)", "value") -DEFINE_VALUE("LOAD(value)", "value") +COMPUTE_FUNCTION("reduce_sum_color") +ADDITIONAL_INFO(compositor_parallel_reduction_shared) +ADDITIONAL_INFO(compositor_parallel_reduction_output_float4) +GROUP_SHARED(float4, reduction_data[gl_WorkGroupSize.x * gl_WorkGroupSize.y]) DO_STATIC_COMPILATION() GPU_SHADER_CREATE_END() @@ -76,36 +109,33 @@ GPU_SHADER_CREATE_END() GPU_SHADER_CREATE_INFO(compositor_sum_squared_difference_float_shared) ADDITIONAL_INFO(compositor_parallel_reduction_shared) -IMAGE(0, SFLOAT_32, write, image2D, output_img) +ADDITIONAL_INFO(compositor_parallel_reduction_float_shared) +ADDITIONAL_INFO(compositor_parallel_reduction_output_float) PUSH_CONSTANT(float, subtrahend) -DEFINE_VALUE("TYPE", "float") -DEFINE_VALUE("IDENTITY", "0.0f") -DEFINE_VALUE("LOAD(value)", "value.x") -DEFINE_VALUE("REDUCE(lhs, rhs)", "lhs + rhs") GPU_SHADER_CREATE_END() GPU_SHADER_CREATE_INFO(compositor_sum_red_squared_difference) +COMPUTE_FUNCTION("reduce_sum_red_squared_difference") ADDITIONAL_INFO(compositor_sum_squared_difference_float_shared) -DEFINE_VALUE("INITIALIZE(value)", "pow(value.r - subtrahend, 2.0f)") DO_STATIC_COMPILATION() GPU_SHADER_CREATE_END() GPU_SHADER_CREATE_INFO(compositor_sum_green_squared_difference) +COMPUTE_FUNCTION("reduce_sum_green_squared_difference") ADDITIONAL_INFO(compositor_sum_squared_difference_float_shared) -DEFINE_VALUE("INITIALIZE(value)", "pow(value.g - subtrahend, 2.0f)") DO_STATIC_COMPILATION() GPU_SHADER_CREATE_END() GPU_SHADER_CREATE_INFO(compositor_sum_blue_squared_difference) +COMPUTE_FUNCTION("reduce_sum_blue_squared_difference") ADDITIONAL_INFO(compositor_sum_squared_difference_float_shared) -DEFINE_VALUE("INITIALIZE(value)", "pow(value.b - subtrahend, 2.0f)") DO_STATIC_COMPILATION() GPU_SHADER_CREATE_END() GPU_SHADER_CREATE_INFO(compositor_sum_luminance_squared_difference) +COMPUTE_FUNCTION("reduce_sum_luminance_squared_difference") ADDITIONAL_INFO(compositor_sum_squared_difference_float_shared) -PUSH_CONSTANT(float3, luminance_coefficients) -DEFINE_VALUE("INITIALIZE(value)", "pow(dot(value.rgb, luminance_coefficients) - subtrahend, 2.0f)") +ADDITIONAL_INFO(compositor_luminance_shared) DO_STATIC_COMPILATION() GPU_SHADER_CREATE_END() @@ -114,61 +144,45 @@ GPU_SHADER_CREATE_END() */ GPU_SHADER_CREATE_INFO(compositor_maximum_luminance) +COMPUTE_FUNCTION("reduce_maximum_luminance") ADDITIONAL_INFO(compositor_parallel_reduction_shared) -IMAGE(0, SFLOAT_32, write, image2D, output_img) -PUSH_CONSTANT(float3, luminance_coefficients) -DEFINE_VALUE("TYPE", "float") -DEFINE_VALUE("IDENTITY", "FLT_MIN") -DEFINE_VALUE("INITIALIZE(value)", "dot(value.rgb, luminance_coefficients)") -DEFINE_VALUE("LOAD(value)", "value.x") -DEFINE_VALUE("REDUCE(lhs, rhs)", "max(lhs, rhs)") +ADDITIONAL_INFO(compositor_parallel_reduction_float_shared) +ADDITIONAL_INFO(compositor_parallel_reduction_output_float) +ADDITIONAL_INFO(compositor_luminance_shared) DO_STATIC_COMPILATION() GPU_SHADER_CREATE_END() GPU_SHADER_CREATE_INFO(compositor_maximum_brightness) +COMPUTE_FUNCTION("reduce_maximum_brightness") ADDITIONAL_INFO(compositor_parallel_reduction_shared) -IMAGE(0, SFLOAT_32, write, image2D, output_img) -DEFINE_VALUE("TYPE", "float") -DEFINE_VALUE("IDENTITY", "FLT_MIN") -DEFINE_VALUE("INITIALIZE(value)", "reduce_max(value.rgb)") -DEFINE_VALUE("LOAD(value)", "value.x") -DEFINE_VALUE("REDUCE(lhs, rhs)", "max(lhs, rhs)") +ADDITIONAL_INFO(compositor_parallel_reduction_float_shared) +ADDITIONAL_INFO(compositor_parallel_reduction_output_float) DO_STATIC_COMPILATION() GPU_SHADER_CREATE_END() GPU_SHADER_CREATE_INFO(compositor_maximum_float) +COMPUTE_FUNCTION("reduce_maximum_float") ADDITIONAL_INFO(compositor_parallel_reduction_shared) -IMAGE(0, SFLOAT_32, write, image2D, output_img) -DEFINE_VALUE("TYPE", "float") -DEFINE_VALUE("IDENTITY", "FLT_MIN") -DEFINE_VALUE("INITIALIZE(value)", "value.x") -DEFINE_VALUE("LOAD(value)", "value.x") -DEFINE_VALUE("REDUCE(lhs, rhs)", "max(rhs, lhs)") +ADDITIONAL_INFO(compositor_parallel_reduction_float_shared) +ADDITIONAL_INFO(compositor_parallel_reduction_output_float) DO_STATIC_COMPILATION() GPU_SHADER_CREATE_END() GPU_SHADER_CREATE_INFO(compositor_maximum_float2) +COMPUTE_FUNCTION("reduce_maximum_float2") ADDITIONAL_INFO(compositor_parallel_reduction_shared) -IMAGE(0, SFLOAT_32_32, write, image2D, output_img) -DEFINE_VALUE("TYPE", "vec2") -DEFINE_VALUE("IDENTITY", "vec2(FLT_MIN)") -DEFINE_VALUE("INITIALIZE(value)", "value.xy") -DEFINE_VALUE("LOAD(value)", "value.xy") -DEFINE_VALUE("REDUCE(lhs, rhs)", "max(rhs, lhs)") -DEFINE_VALUE("WRITE(value)", "vec4(value, vec2(0.0f))") +ADDITIONAL_INFO(compositor_parallel_reduction_float2_shared) +ADDITIONAL_INFO(compositor_parallel_reduction_output_float2) DO_STATIC_COMPILATION() GPU_SHADER_CREATE_END() GPU_SHADER_CREATE_INFO(compositor_maximum_float_in_range) +COMPUTE_FUNCTION("reduce_maximum_float_in_range") ADDITIONAL_INFO(compositor_parallel_reduction_shared) -IMAGE(0, SFLOAT_32, write, image2D, output_img) +ADDITIONAL_INFO(compositor_parallel_reduction_float_shared) +ADDITIONAL_INFO(compositor_parallel_reduction_output_float) PUSH_CONSTANT(float, lower_bound) PUSH_CONSTANT(float, upper_bound) -DEFINE_VALUE("TYPE", "float") -DEFINE_VALUE("IDENTITY", "lower_bound") -DEFINE_VALUE("INITIALIZE(v)", "((v.x <= upper_bound) && (v.x >= lower_bound)) ? v.x : lower_bound") -DEFINE_VALUE("LOAD(value)", "value.x") -DEFINE_VALUE("REDUCE(lhs, rhs)", "((rhs > lhs) && (rhs <= upper_bound)) ? rhs : lhs") DO_STATIC_COMPILATION() GPU_SHADER_CREATE_END() @@ -177,38 +191,29 @@ GPU_SHADER_CREATE_END() */ GPU_SHADER_CREATE_INFO(compositor_minimum_luminance) +COMPUTE_FUNCTION("reduce_minimum_luminance") ADDITIONAL_INFO(compositor_parallel_reduction_shared) -IMAGE(0, SFLOAT_32, write, image2D, output_img) -PUSH_CONSTANT(float3, luminance_coefficients) -DEFINE_VALUE("TYPE", "float") -DEFINE_VALUE("IDENTITY", "FLT_MAX") -DEFINE_VALUE("INITIALIZE(value)", "dot(value.rgb, luminance_coefficients)") -DEFINE_VALUE("LOAD(value)", "value.x") -DEFINE_VALUE("REDUCE(lhs, rhs)", "min(lhs, rhs)") +ADDITIONAL_INFO(compositor_parallel_reduction_float_shared) +ADDITIONAL_INFO(compositor_parallel_reduction_output_float) +ADDITIONAL_INFO(compositor_luminance_shared) DO_STATIC_COMPILATION() GPU_SHADER_CREATE_END() GPU_SHADER_CREATE_INFO(compositor_minimum_float) +COMPUTE_FUNCTION("reduce_minimum_float") ADDITIONAL_INFO(compositor_parallel_reduction_shared) -IMAGE(0, SFLOAT_32, write, image2D, output_img) -DEFINE_VALUE("TYPE", "float") -DEFINE_VALUE("IDENTITY", "FLT_MAX") -DEFINE_VALUE("INITIALIZE(value)", "value.x") -DEFINE_VALUE("LOAD(value)", "value.x") -DEFINE_VALUE("REDUCE(lhs, rhs)", "min(rhs, lhs)") +ADDITIONAL_INFO(compositor_parallel_reduction_float_shared) +ADDITIONAL_INFO(compositor_parallel_reduction_output_float) DO_STATIC_COMPILATION() GPU_SHADER_CREATE_END() GPU_SHADER_CREATE_INFO(compositor_minimum_float_in_range) +COMPUTE_FUNCTION("reduce_minimum_float_in_range") ADDITIONAL_INFO(compositor_parallel_reduction_shared) -IMAGE(0, SFLOAT_32, write, image2D, output_img) +ADDITIONAL_INFO(compositor_parallel_reduction_float_shared) +ADDITIONAL_INFO(compositor_parallel_reduction_output_float) PUSH_CONSTANT(float, lower_bound) PUSH_CONSTANT(float, upper_bound) -DEFINE_VALUE("TYPE", "float") -DEFINE_VALUE("IDENTITY", "upper_bound") -DEFINE_VALUE("INITIALIZE(v)", "((v.x <= upper_bound) && (v.x >= lower_bound)) ? v.x : upper_bound") -DEFINE_VALUE("LOAD(value)", "value.x") -DEFINE_VALUE("REDUCE(lhs, rhs)", "((rhs < lhs) && (rhs >= lower_bound)) ? rhs : lhs") DO_STATIC_COMPILATION() GPU_SHADER_CREATE_END() @@ -217,17 +222,12 @@ GPU_SHADER_CREATE_END() */ GPU_SHADER_CREATE_INFO(compositor_max_velocity) +ADDITIONAL_INFO(compositor_parallel_reduction_float4_shared) +ADDITIONAL_INFO(compositor_parallel_reduction_output_half4) +COMPUTE_FUNCTION("reduce_max_velocity") LOCAL_GROUP_SIZE(32, 32) PUSH_CONSTANT(bool, is_initial_reduction) SAMPLER(0, sampler2D, input_tx) -IMAGE(0, SFLOAT_16_16_16_16, write, image2D, output_img) -DEFINE_VALUE("TYPE", "vec4") -DEFINE_VALUE("IDENTITY", "vec4(0.0f)") -DEFINE_VALUE("INITIALIZE(value)", "value") -DEFINE_VALUE("LOAD(value)", "value") -DEFINE_VALUE("REDUCE(lhs, rhs)", - "vec4(dot(lhs.xy, lhs.xy) > dot(rhs.xy, rhs.xy) ? lhs.xy : rhs.xy," - " dot(lhs.zw, lhs.zw) > dot(rhs.zw, rhs.zw) ? lhs.zw : rhs.zw)") COMPUTE_SOURCE("compositor_parallel_reduction.glsl") DO_STATIC_COMPILATION() GPU_SHADER_CREATE_END()