BLI: add float<->half conversion functions with correct math, use in Vulkan

Blender codebase had two ways to convert half (FP16) to float (FP32): - BLI_math_bits.h half_to_float. Out of 64k possible half values, it converts 4096 of them incorrectly. Mostly denormals and NaNs, which is perhaps not too relevant. But more importantly, it converts half zero to float 0.000030517578 which does not sound ideal. - Functions in Vulkan vk_data_conversion.hh. This one converts 2046 possible half values incorrectly. Function to convert float (FP32) to half (FP16) was in Vulkan vk_data_conversion.hh, and it got a bunch of possible inputs wrong. I guess it did not do proper "round to nearest even" that CPU/GPU hardware does. This PR: - Adds BLI_math_half.hh with float_to_half and half_to_float functions. - Documentation and test coverage. - When compiling on ARM NEON, use hardware VCVT instructions. - Removes the incorrect half_to_float from BLI_math_bits.h and replaces single usage of it in View3D color picking to use the new function. - Changes Vulkan FP32<->FP16 conversion code to use the new functions, to fix correctness issues (makes eevee_next_bsdf_vulkan test pass). This makes it faster too. Pull Request: https://projects.blender.org/blender/blender/pulls/127708
2024-09-18 13:15:00 +02:00
parent 5fe32f351a
commit 92544d6d76
8 changed files with 310 additions and 24 deletions
--- a/source/blender/blenlib/BLI_math_bits.h
+++ b/source/blender/blenlib/BLI_math_bits.h
@@ -60,8 +60,6 @@ MINLINE float int_as_float(int i);
 MINLINE float uint_as_float(unsigned int i);
 MINLINE float xor_fl(float x, int y);

-MINLINE float half_to_float(ushort h);
-
 #if BLI_MATH_DO_INLINE
 #  include "intern/math_bits_inline.c"
 #endif
--- a/source/blender/blenlib/BLI_math_half.hh
+++ b/source/blender/blenlib/BLI_math_half.hh
@@ -0,0 +1,37 @@
+/* SPDX-FileCopyrightText: 2024 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+#pragma once
+
+/** \file
+ * \ingroup bli
+ */
+
+#include <cstdint>
+
+namespace blender::math {
+
+/**
+ * Float (FP32) <-> Half (FP16) conversion functions.
+ *
+ * Behavior matches hardware (x64 F16C, ARM NEON fcvt),
+ * including handling of denormals, infinities, NaNs, rounding
+ * is to nearest even, etc. When NaNs are produced, the exact
+ * bit pattern might not match hardware, but it will still be a NaN.
+ *
+ * When compiling for ARM NEON (e.g. Apple Silicon),
+ * hardware VCVT instructions are used.
+ */
+
+/**
+ * Converts float (FP32) number to half-precision (FP16).
+ */
+uint16_t float_to_half(float v);
+
+/**
+ * Converts half-precision (FP16) number to float (FP32).
+ */
+float half_to_float(uint16_t v);
+
+}  // namespace blender::math
--- a/source/blender/blenlib/CMakeLists.txt
+++ b/source/blender/blenlib/CMakeLists.txt
@@ -106,6 +106,7 @@ set(SRC
  intern/math_color_inline.c
  intern/math_geom.cc
  intern/math_geom_inline.c
+  intern/math_half.cc
  intern/math_interp.cc
  intern/math_matrix.cc
  intern/math_matrix_c.cc
@@ -303,6 +304,7 @@ set(SRC
  BLI_math_euler.hh
  BLI_math_euler_types.hh
  BLI_math_geom.h
+  BLI_math_half.hh
  BLI_math_inline.h
  BLI_math_interp.hh
  BLI_math_matrix.h
@@ -562,6 +564,7 @@ if(WITH_GTESTS)
    tests/BLI_math_bits_test.cc
    tests/BLI_math_color_test.cc
    tests/BLI_math_geom_test.cc
+    tests/BLI_math_half_test.cc
    tests/BLI_math_interp_test.cc
    tests/BLI_math_matrix_test.cc
    tests/BLI_math_matrix_types_test.cc
--- a/source/blender/blenlib/intern/math_bits_inline.c
+++ b/source/blender/blenlib/intern/math_bits_inline.c
@@ -173,13 +173,4 @@ MINLINE float xor_fl(float x, int y)
  return int_as_float(float_as_int(x) ^ y);
 }

-MINLINE float half_to_float(ushort h)
-{
-  const uint sign = (h & 0x8000);
-  const uint exponent = (h & 0x7c00) + 0x1C000;
-  const uint mantissa = (h & 0x03FF);
-  const uint x = (sign << 16) | (exponent << 13) | (mantissa << 13);
-  return uint_as_float(x);
-}
-
 #endif /* __MATH_BITS_INLINE_C__ */
--- a/source/blender/blenlib/intern/math_half.cc
+++ b/source/blender/blenlib/intern/math_half.cc
@@ -0,0 +1,119 @@
+/* SPDX-FileCopyrightText: 2024 Blender Authors
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later */
+
+/** \file
+ * \ingroup bli
+ */
+
+#include "BLI_math_half.hh"
+
+#if defined(__ARM_NEON)
+#  define USE_HARDWARE_FP16_NEON /* Use ARM FP16 conversion instructions */
+#  include <arm_neon.h>
+#endif
+
+uint16_t blender::math::float_to_half(float v)
+{
+#if defined(USE_HARDWARE_FP16_NEON)
+  float16x4_t h4 = vcvt_f16_f32(vdupq_n_f32(v));
+  float16_t h = vget_lane_f16(h4, 0);
+  return *(uint16_t *)&h;
+#else
+  /* Based on float_to_half_fast3_rtne from public domain https://gist.github.com/rygorous/2156668
+   * see corresponding blog post https://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/
+   */
+  union FP32 {
+    uint32_t u;
+    float f;
+  };
+  FP32 f;
+  f.f = v;
+  FP32 f32infty = {255 << 23};
+  FP32 f16max = {(127 + 16) << 23};
+  FP32 denorm_magic = {((127 - 15) + (23 - 10) + 1) << 23};
+  uint32_t sign_mask = 0x80000000u;
+  uint16_t o = {0};
+
+  uint32_t sign = f.u & sign_mask;
+  f.u ^= sign;
+
+  /*
+   * NOTE all the integer compares in this function can be safely
+   * compiled into signed compares since all operands are below
+   * 0x80000000. Important if you want fast straight SSE2 code
+   * (since there's no unsigned PCMPGTD).
+   */
+  if (f.u >= f16max.u) {
+    /* result is Inf or NaN (all exponent bits set) */
+    o = (f.u > f32infty.u) ? 0x7e00 : 0x7c00; /* NaN->qNaN and Inf->Inf */
+  }
+  else {
+    /* (De)normalized number or zero */
+    if (f.u < (113 << 23)) {
+      /* Resulting FP16 is subnormal or zero.
+       * Use a magic value to align our 10 mantissa bits at the bottom of
+       * the float. as long as FP addition is round-to-nearest-even this
+       * just works. */
+      f.f += denorm_magic.f;
+
+      /* and one integer subtract of the bias later, we have our final float! */
+      o = f.u - denorm_magic.u;
+    }
+    else {
+      uint32_t mant_odd = (f.u >> 13) & 1; /* resulting mantissa is odd */
+
+      /* update exponent, rounding bias part 1 */
+      f.u += ((uint32_t)(15 - 127) << 23) + 0xfff;
+      /* rounding bias part 2 */
+      f.u += mant_odd;
+      /* take the bits! */
+      o = f.u >> 13;
+    }
+  }
+
+  o |= sign >> 16;
+  return o;
+#endif
+}
+
+float blender::math::half_to_float(uint16_t v)
+{
+#if defined(USE_HARDWARE_FP16_NEON)
+  uint16x4_t v4 = vdup_n_u16(v);
+  float16x4_t h4 = vreinterpret_f16_u16(v4);
+  float32x4_t f4 = vcvt_f32_f16(h4);
+  return vgetq_lane_f32(f4, 0);
+#else
+  /* Based on half_to_float_fast4 from public domain https://gist.github.com/rygorous/2144712
+   * see corresponding blog post https://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/
+   */
+  union FP32 {
+    uint32_t u;
+    float f;
+  };
+  constexpr FP32 magic = {113 << 23};
+  constexpr uint32_t shifted_exp = 0x7c00 << 13; /* exponent mask after shift */
+  FP32 o;
+
+  o.u = (v & 0x7fff) << 13;         /* exponent/mantissa bits */
+  uint32_t exp = shifted_exp & o.u; /* just the exponent */
+  o.u += (127 - 15) << 23;          /* exponent adjust */
+
+  /* handle exponent special cases */
+  if (exp == shifted_exp) {  /* Inf/NaN? */
+    o.u += (128 - 16) << 23; /* extra exp adjust */
+  }
+  else if (exp == 0) { /* Zero/Denormal? */
+    o.u += 1 << 23;    /* extra exp adjust */
+    o.f -= magic.f;    /* renormalize */
+  }
+
+  o.u |= (v & 0x8000) << 16; /* sign bit */
+  return o.f;
+#endif
+}
+
+#ifdef USE_HARDWARE_FP16_NEON
+#  undef USE_HARDWARE_FP16_NEON
+#endif
--- a/source/blender/blenlib/tests/BLI_math_half_test.cc
+++ b/source/blender/blenlib/tests/BLI_math_half_test.cc
@@ -0,0 +1,136 @@
+/* SPDX-FileCopyrightText: 2024 Blender Authors
+ *
+ * SPDX-License-Identifier: Apache-2.0 */
+
+#include "testing/testing.h"
+
+#include "BLI_math_half.hh"
+#include "BLI_time.h"
+
+#include <cmath>
+
+// #define DO_PERF_TESTS 1
+
+namespace blender::tests {
+
+TEST(math_half, half_to_float_scalar)
+{
+  EXPECT_EQ(blender::math::half_to_float(0), 0.0f);
+  EXPECT_EQ(blender::math::half_to_float(1), 5.960464478e-08f);
+  EXPECT_EQ(blender::math::half_to_float(32), 1.907348633e-06f);
+  EXPECT_EQ(blender::math::half_to_float(37), 2.205371857e-06f);
+  EXPECT_EQ(blender::math::half_to_float(511), 3.045797348e-05f);
+  EXPECT_EQ(blender::math::half_to_float(999), 5.954504013e-05f);
+  EXPECT_EQ(blender::math::half_to_float(1024), 6.103515625e-05f);
+  EXPECT_EQ(blender::math::half_to_float(1357), 8.088350296e-05f);
+  EXPECT_EQ(blender::math::half_to_float(6789), 0.003183364868f);
+  EXPECT_EQ(blender::math::half_to_float(16383), 1.999023438f);
+  EXPECT_EQ(blender::math::half_to_float(16384), 2.0f);
+  EXPECT_EQ(blender::math::half_to_float(31743), 65504.0f);
+  EXPECT_EQ(blender::math::half_to_float(31744), std::numeric_limits<float>::infinity());
+  EXPECT_TRUE(std::isnan(blender::math::half_to_float(31746)));
+  EXPECT_TRUE(std::isnan(blender::math::half_to_float(32767)));
+  EXPECT_EQ(blender::math::half_to_float(32768), -0.0f);
+  EXPECT_EQ(blender::math::half_to_float(32769), -5.960464478e-08f);
+  EXPECT_EQ(blender::math::half_to_float(46765), -0.4172363281f);
+  EXPECT_EQ(blender::math::half_to_float(54501), -78.3125f);
+  EXPECT_EQ(blender::math::half_to_float(64511), -65504.0f);
+  EXPECT_EQ(blender::math::half_to_float(64512), -std::numeric_limits<float>::infinity());
+  EXPECT_TRUE(std::isnan(blender::math::half_to_float(64513)));
+  EXPECT_TRUE(std::isnan(blender::math::half_to_float(65535)));
+}
+
+TEST(math_half, float_to_half_scalar)
+{
+#define HFUN(v) blender::math::float_to_half(v)
+  EXPECT_EQ(HFUN(0.0f), 0);
+  EXPECT_EQ(HFUN(std::numeric_limits<float>::min()), 0);
+  EXPECT_EQ(HFUN(5.960464478e-08f), 1);
+  EXPECT_EQ(HFUN(1.907348633e-06f), 32);
+  EXPECT_EQ(HFUN(2.205371857e-06f), 37);
+  EXPECT_EQ(HFUN(3.045797348e-05f), 511);
+  EXPECT_EQ(HFUN(5.954504013e-05f), 999);
+  EXPECT_EQ(HFUN(6.103515625e-05f), 1024);
+  EXPECT_EQ(HFUN(8.088350296e-05f), 1357);
+  EXPECT_EQ(HFUN(0.003183364868f), 6789);
+  EXPECT_EQ(HFUN(0.1f), 11878);
+  EXPECT_EQ(HFUN(1.0f), 15360);
+  EXPECT_EQ(HFUN(1.999023438f), 16383);
+  EXPECT_EQ(HFUN(1.999523438f), 16384);
+  EXPECT_EQ(HFUN(2.0f), 16384);
+  EXPECT_EQ(HFUN(11.0f), 18816);
+  EXPECT_EQ(HFUN(65504.0f), 31743);
+  EXPECT_EQ(HFUN(65535.0f), 31744); /* FP16 inf */
+  EXPECT_EQ(HFUN(1.0e6f), 31744);   /* FP16 inf */
+  EXPECT_EQ(HFUN(std::numeric_limits<float>::infinity()), 31744);
+  EXPECT_EQ(HFUN(std::numeric_limits<float>::max()), 31744);
+  EXPECT_EQ(HFUN(std::numeric_limits<float>::quiet_NaN()), 32256);
+  EXPECT_EQ(HFUN(-0.0f), 32768);
+  EXPECT_EQ(HFUN(-5.960464478e-08f), 32769);
+  EXPECT_EQ(HFUN(-0.4172363281f), 46765);
+  EXPECT_EQ(HFUN(-1.0f), 48128);
+  EXPECT_EQ(HFUN(-78.3125f), 54501);
+  EXPECT_EQ(HFUN(-123.5f), 55224);
+  EXPECT_EQ(HFUN(-65504.0f), 64511);
+  EXPECT_EQ(HFUN(-65536.0f), 64512); /* FP16 -inf */
+  EXPECT_EQ(HFUN(-1.0e6f), 64512);   /* FP16 -inf */
+  EXPECT_EQ(HFUN(-std::numeric_limits<float>::infinity()), 64512);
+#undef HFUN
+}
+
+#ifdef DO_PERF_TESTS
+
+/*
+ * Performance numbers of various other solutions, all on Ryzen 5950X, VS2022.
+ * This is time taken to convert 100 million numbers FP16 -> FP32.
+ *
+ * - CPU: F16C instructions 44ms
+ * - OpenEXR/Imath: 21ms
+ * - blender::math::half_to_float: 164ms
+ * - convert_float_formats from VK_data_conversion.hh: 244ms [converts 2046 values wrong]
+ *
+ * On Mac M1 Max (Clang 15):
+ * - blender::math::half_to_float: 127ms (C), 97ms (NEON vcvt)
+ */
+TEST(math_half_perf, half_to_float_scalar)
+{
+  double t0 = BLI_time_now_seconds();
+  size_t sum = 0;
+  for (int i = 0; i < 100'000'000; i++) {
+    float f = blender::math::half_to_float(uint16_t(i & 0xFFFF));
+    uint32_t fu;
+    memcpy(&fu, &f, sizeof(f));
+    sum += fu;
+  }
+  double t1 = BLI_time_now_seconds();
+  printf("- FP16->FP32 blimath: %.3fs sum %zu\n", t1 - t0, sum);
+}
+
+/*
+ * Performance numbers of various other solutions, all on Ryzen 5950X, VS2022.
+ * This is time taken to convert 100 million numbers FP32 -> FP16.
+ *
+ * - CPU: F16C instructions 61ms
+ * - OpenEXR/Imath: 240ms
+ * - blender::math::float_to_half: 242ms
+ * - convert_float_formats from VK_data_conversion.hh: 247ms [converts many values wrong]
+ *
+ * On Mac M1 Max (Clang 15):
+ * - blender::math::half_to_float: 198ms (C), 97ms (NEON vcvt)
+ */
+TEST(math_half_perf, float_to_half_scalar_math)  // 242ms
+{
+  double t0 = BLI_time_now_seconds();
+  uint32_t sum = 0;
+  for (int i = 0; i < 100'000'000; i++) {
+    float f = ((i & 0xFFFF) - 0x8000) + 0.1f;
+    uint16_t h = blender::math::float_to_half(f);
+    sum += h;
+  }
+  double t1 = BLI_time_now_seconds();
+  printf("- FP32->FP16 blimath: %.3fs sum %u\n", t1 - t0, sum);
+}
+
+#endif  // #ifdef DO_PERF_TESTS
+
+}  // namespace blender::tests
--- a/source/blender/editors/space_view3d/view3d_draw.cc
+++ b/source/blender/editors/space_view3d/view3d_draw.cc
@@ -9,6 +9,7 @@
 #include <cmath>

 #include "BLI_listbase.h"
+#include "BLI_math_half.hh"
 #include "BLI_math_matrix.h"
 #include "BLI_math_rotation.h"
 #include "BLI_rect.h"
@@ -2759,15 +2760,15 @@ bool ViewportColorSampleSession::sample(const int mval[2], float r_col[3])

  blender::ushort4 pixel = data[mval[1] * tex_w + mval[0]];

-  if (half_to_float(pixel.w) < 0.5f) {
+  if (blender::math::half_to_float(pixel.w) < 0.5f) {
    /* Background etc. are not rendered to the viewport texture, so fall back to basic color
     * picking for those. */
    return false;
  }

-  r_col[0] = half_to_float(pixel.x);
-  r_col[1] = half_to_float(pixel.y);
-  r_col[2] = half_to_float(pixel.z);
+  r_col[0] = blender::math::half_to_float(pixel.x);
+  r_col[1] = blender::math::half_to_float(pixel.y);
+  r_col[2] = blender::math::half_to_float(pixel.z);

  return true;
 }
--- a/source/blender/gpu/vulkan/vk_data_conversion.cc
+++ b/source/blender/gpu/vulkan/vk_data_conversion.cc
@@ -12,6 +12,7 @@
 #include "gpu_vertex_format_private.hh"

 #include "BLI_color.hh"
+#include "BLI_math_half.hh"

 namespace blender::gpu {

@@ -802,12 +803,12 @@ void convert(DestinationType &dst, const SourceType &src)

 static void convert(F16 &dst, const F32 &src)
 {
-  dst.value = convert_float_formats<FormatF16, FormatF32>(float_to_uint32_t(src.value));
+  dst.value = math::float_to_half(src.value);
 }

 static void convert(F32 &dst, const F16 &src)
 {
-  dst.value = uint32_t_to_float(convert_float_formats<FormatF32, FormatF16>(src.value));
+  dst.value = math::half_to_float(src.value);
 }

 static void convert(SRGBA8 &dst, const FLOAT4 &src)
@@ -822,17 +823,17 @@ static void convert(FLOAT4 &dst, const SRGBA8 &src)

 static void convert(FLOAT3 &dst, const HALF4 &src)
 {
-  dst.value.x = uint32_t_to_float(convert_float_formats<FormatF32, FormatF16>(src.get_r()));
-  dst.value.y = uint32_t_to_float(convert_float_formats<FormatF32, FormatF16>(src.get_g()));
-  dst.value.z = uint32_t_to_float(convert_float_formats<FormatF32, FormatF16>(src.get_b()));
+  dst.value.x = math::half_to_float(src.get_r());
+  dst.value.y = math::half_to_float(src.get_g());
+  dst.value.z = math::half_to_float(src.get_b());
 }

 static void convert(HALF4 &dst, const FLOAT3 &src)
 {
-  dst.set_r(convert_float_formats<FormatF16, FormatF32>(float_to_uint32_t(src.value.x)));
-  dst.set_g(convert_float_formats<FormatF16, FormatF32>(float_to_uint32_t(src.value.y)));
-  dst.set_b(convert_float_formats<FormatF16, FormatF32>(float_to_uint32_t(src.value.z)));
-  dst.set_a(convert_float_formats<FormatF16, FormatF32>(float_to_uint32_t(1.0f)));
+  dst.set_r(math::float_to_half(src.value.x));
+  dst.set_g(math::float_to_half(src.value.y));
+  dst.set_b(math::float_to_half(src.value.z));
+  dst.set_a(0x3c00); /* FP16 1.0 */
 }

 static void convert(FLOAT3 &dst, const FLOAT4 &src)