From a94b8ade2067437dc4558475996b9718f401d240 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Foucault?= Date: Thu, 30 May 2024 19:59:18 +0200 Subject: [PATCH] GPU: Add library for handling shared exponent format in software This allows reducing bandwidth at the cost of some instructions for packing and decoding the texture. Pull Request: https://projects.blender.org/blender/blender/pulls/122446 --- source/blender/gpu/CMakeLists.txt | 1 + .../gpu_shader_shared_exponent_lib.glsl | 78 +++++++++++++++++++ 2 files changed, 79 insertions(+) create mode 100644 source/blender/gpu/shaders/common/gpu_shader_shared_exponent_lib.glsl diff --git a/source/blender/gpu/CMakeLists.txt b/source/blender/gpu/CMakeLists.txt index c54bb6435df..b0f3d8379a8 100644 --- a/source/blender/gpu/CMakeLists.txt +++ b/source/blender/gpu/CMakeLists.txt @@ -519,6 +519,7 @@ set(GLSL_SRC shaders/common/gpu_shader_math_matrix_lib.glsl shaders/common/gpu_shader_math_rotation_lib.glsl shaders/common/gpu_shader_math_vector_lib.glsl + shaders/common/gpu_shader_shared_exponent_lib.glsl shaders/common/gpu_shader_smaa_lib.glsl shaders/common/gpu_shader_test_lib.glsl shaders/common/gpu_shader_utildefines_lib.glsl diff --git a/source/blender/gpu/shaders/common/gpu_shader_shared_exponent_lib.glsl b/source/blender/gpu/shaders/common/gpu_shader_shared_exponent_lib.glsl new file mode 100644 index 00000000000..05d529faa83 --- /dev/null +++ b/source/blender/gpu/shaders/common/gpu_shader_shared_exponent_lib.glsl @@ -0,0 +1,78 @@ +/* SPDX-FileCopyrightText: 2024 Blender Authors + * + * SPDX-License-Identifier: GPL-2.0-or-later */ + +/** + * Software implementation of encoding and decoding of shared exponent texture as described by the + * OpenGL extension EXT_texture_shared_exponent Appendix + * https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_shared_exponent.txt + * + * This allows to read and write the RGB9_E5 format in a R32UI texture without explicit support on + * the hardware for this type. However, filtering is not supported in this case. + */ + +#define RGB9E5_EXPONENT_BITS 5 +#define RGB9E5_MANTISSA_BITS 9 +#define RGB9E5_EXP_BIAS 15 +#define RGB9E5_MAX_VALID_BIASED_EXP 31 + +#define MAX_RGB9E5_EXP (RGB9E5_MAX_VALID_BIASED_EXP - RGB9E5_EXP_BIAS) +#define RGB9E5_MANTISSA_VALUES (1 << RGB9E5_MANTISSA_BITS) +#define MAX_RGB9E5_MANTISSA (RGB9E5_MANTISSA_VALUES - 1) + +int rgb9e5_floor_log2(float x) +{ + /* Ok, rgb9e5_floor_log2 is not correct for the denorm and zero values, but we + * are going to do a max of this value with the minimum rgb9e5 exponent + * that will hide these problem cases. */ + int biased_exponent = floatBitsToInt(x) >> 23; + return biased_exponent - 127; +} + +float rgb9e5_exponent_factor(int exponent) +{ + /* This pow function could be replaced by a table. There is only 32 values. */ + return exp2(float(exponent - RGB9E5_EXP_BIAS - RGB9E5_MANTISSA_BITS)); +} + +struct rgb9e5_t { + uint exp_shared; + uvec3 mantissa; +}; + +rgb9e5_t rgb9e5_from_float3(vec3 color) +{ + const float max_rgb9e5 = float(0xFF80u); + color = clamp(color, 0.0, max_rgb9e5); + + float max_component = max(max(color.r, color.g), color.b); + int log2_floored = rgb9e5_floor_log2(max_component); + int exp_shared = max(-RGB9E5_EXP_BIAS - 1, log2_floored) + (1 + RGB9E5_EXP_BIAS); + float denom = rgb9e5_exponent_factor(exp_shared); + int maxm = int(max_component / denom + 0.5); + if (maxm == MAX_RGB9E5_MANTISSA + 1) { + denom *= 2.0; + exp_shared += 1; + } + + rgb9e5_t result; + result.exp_shared = uint(exp_shared); + result.mantissa = uvec3(color / denom + 0.5); + return result; +} + +uint rgb9e5_encode(vec3 color) +{ + rgb9e5_t result = rgb9e5_from_float3(color); + result.exp_shared <<= RGB9E5_MANTISSA_BITS * 3; + result.mantissa <<= RGB9E5_MANTISSA_BITS * uvec3(0, 1, 2); + return result.mantissa.r | result.mantissa.g | result.mantissa.b | result.exp_shared; +} + +vec3 rgb9e5_decode(uint data) +{ + int exp_shared = int(data >> (RGB9E5_MANTISSA_BITS * 3)); + uvec3 mantissa = (uvec3(data) >> (RGB9E5_MANTISSA_BITS * uvec3(0, 1, 2))) & + uint(MAX_RGB9E5_MANTISSA); + return mantissa * rgb9e5_exponent_factor(exp_shared); +}