Blender codebase had two ways to convert half (FP16) to float (FP32):
- BLI_math_bits.h half_to_float. Out of 64k possible half values, it converts
4096 of them incorrectly. Mostly denormals and NaNs, which is perhaps not too
relevant. But more importantly, it converts half zero to float 0.000030517578
which does not sound ideal.
- Functions in Vulkan vk_data_conversion.hh. This one converts 2046 possible
half values incorrectly.
Function to convert float (FP32) to half (FP16) was in Vulkan
vk_data_conversion.hh, and it got a bunch of possible inputs wrong. I guess it
did not do proper "round to nearest even" that CPU/GPU hardware does.
This PR:
- Adds BLI_math_half.hh with float_to_half and half_to_float functions.
- Documentation and test coverage.
- When compiling on ARM NEON, use hardware VCVT instructions.
- Removes the incorrect half_to_float from BLI_math_bits.h and replaces single
usage of it in View3D color picking to use the new function.
- Changes Vulkan FP32<->FP16 conversion code to use the new functions, to fix
correctness issues (makes eevee_next_bsdf_vulkan test pass). This makes it
faster too.
Pull Request: https://projects.blender.org/blender/blender/pulls/127708
120 lines
3.3 KiB
C++
120 lines
3.3 KiB
C++
/* SPDX-FileCopyrightText: 2024 Blender Authors
|
|
*
|
|
* SPDX-License-Identifier: GPL-2.0-or-later */
|
|
|
|
/** \file
|
|
* \ingroup bli
|
|
*/
|
|
|
|
#include "BLI_math_half.hh"
|
|
|
|
#if defined(__ARM_NEON)
|
|
# define USE_HARDWARE_FP16_NEON /* Use ARM FP16 conversion instructions */
|
|
# include <arm_neon.h>
|
|
#endif
|
|
|
|
uint16_t blender::math::float_to_half(float v)
|
|
{
|
|
#if defined(USE_HARDWARE_FP16_NEON)
|
|
float16x4_t h4 = vcvt_f16_f32(vdupq_n_f32(v));
|
|
float16_t h = vget_lane_f16(h4, 0);
|
|
return *(uint16_t *)&h;
|
|
#else
|
|
/* Based on float_to_half_fast3_rtne from public domain https://gist.github.com/rygorous/2156668
|
|
* see corresponding blog post https://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/
|
|
*/
|
|
union FP32 {
|
|
uint32_t u;
|
|
float f;
|
|
};
|
|
FP32 f;
|
|
f.f = v;
|
|
FP32 f32infty = {255 << 23};
|
|
FP32 f16max = {(127 + 16) << 23};
|
|
FP32 denorm_magic = {((127 - 15) + (23 - 10) + 1) << 23};
|
|
uint32_t sign_mask = 0x80000000u;
|
|
uint16_t o = {0};
|
|
|
|
uint32_t sign = f.u & sign_mask;
|
|
f.u ^= sign;
|
|
|
|
/*
|
|
* NOTE all the integer compares in this function can be safely
|
|
* compiled into signed compares since all operands are below
|
|
* 0x80000000. Important if you want fast straight SSE2 code
|
|
* (since there's no unsigned PCMPGTD).
|
|
*/
|
|
if (f.u >= f16max.u) {
|
|
/* result is Inf or NaN (all exponent bits set) */
|
|
o = (f.u > f32infty.u) ? 0x7e00 : 0x7c00; /* NaN->qNaN and Inf->Inf */
|
|
}
|
|
else {
|
|
/* (De)normalized number or zero */
|
|
if (f.u < (113 << 23)) {
|
|
/* Resulting FP16 is subnormal or zero.
|
|
* Use a magic value to align our 10 mantissa bits at the bottom of
|
|
* the float. as long as FP addition is round-to-nearest-even this
|
|
* just works. */
|
|
f.f += denorm_magic.f;
|
|
|
|
/* and one integer subtract of the bias later, we have our final float! */
|
|
o = f.u - denorm_magic.u;
|
|
}
|
|
else {
|
|
uint32_t mant_odd = (f.u >> 13) & 1; /* resulting mantissa is odd */
|
|
|
|
/* update exponent, rounding bias part 1 */
|
|
f.u += ((uint32_t)(15 - 127) << 23) + 0xfff;
|
|
/* rounding bias part 2 */
|
|
f.u += mant_odd;
|
|
/* take the bits! */
|
|
o = f.u >> 13;
|
|
}
|
|
}
|
|
|
|
o |= sign >> 16;
|
|
return o;
|
|
#endif
|
|
}
|
|
|
|
float blender::math::half_to_float(uint16_t v)
|
|
{
|
|
#if defined(USE_HARDWARE_FP16_NEON)
|
|
uint16x4_t v4 = vdup_n_u16(v);
|
|
float16x4_t h4 = vreinterpret_f16_u16(v4);
|
|
float32x4_t f4 = vcvt_f32_f16(h4);
|
|
return vgetq_lane_f32(f4, 0);
|
|
#else
|
|
/* Based on half_to_float_fast4 from public domain https://gist.github.com/rygorous/2144712
|
|
* see corresponding blog post https://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/
|
|
*/
|
|
union FP32 {
|
|
uint32_t u;
|
|
float f;
|
|
};
|
|
constexpr FP32 magic = {113 << 23};
|
|
constexpr uint32_t shifted_exp = 0x7c00 << 13; /* exponent mask after shift */
|
|
FP32 o;
|
|
|
|
o.u = (v & 0x7fff) << 13; /* exponent/mantissa bits */
|
|
uint32_t exp = shifted_exp & o.u; /* just the exponent */
|
|
o.u += (127 - 15) << 23; /* exponent adjust */
|
|
|
|
/* handle exponent special cases */
|
|
if (exp == shifted_exp) { /* Inf/NaN? */
|
|
o.u += (128 - 16) << 23; /* extra exp adjust */
|
|
}
|
|
else if (exp == 0) { /* Zero/Denormal? */
|
|
o.u += 1 << 23; /* extra exp adjust */
|
|
o.f -= magic.f; /* renormalize */
|
|
}
|
|
|
|
o.u |= (v & 0x8000) << 16; /* sign bit */
|
|
return o.f;
|
|
#endif
|
|
}
|
|
|
|
#ifdef USE_HARDWARE_FP16_NEON
|
|
# undef USE_HARDWARE_FP16_NEON
|
|
#endif
|