Fix #136138, #136449: Cycles HIP RDNA2 white and blue render artifacts

There is a known precision bug in the current HIP compiler version (RDNA2 family/Windows) that has already been fixed and will be available in a future HIP SDK release. Enabling more precise math prevents the artifacts. This may cause a 5-10% performance drop in some scenes. Fix #136138: Microfacet BSDF Fix #136449: Hair BSDF Pull Request: https://projects.blender.org/blender/blender/pulls/136341
2025-03-25 18:21:16 +01:00
parent 821e0024d4
commit ae710101f5
4 changed files with 43 additions and 0 deletions
--- a/intern/cycles/device/hip/device_impl.cpp
+++ b/intern/cycles/device/hip/device_impl.cpp
@@ -261,6 +261,11 @@ string HIPDevice::compile_kernel(const uint kernel_features, const char *name, c

  const char *const kernel_ext = "genco";
  std::string options = "-Wno-parentheses-equality -Wno-unused-value -ffast-math";
+  if (hipNeedPreciseMath(arch)) {
+    options.append(
+        " -fhip-fp32-correctly-rounded-divide-sqrt -fno-gpu-approx-transcendentals "
+        "-fgpu-flush-denormals-to-zero -ffp-contract=off");
+  }

 #  ifndef NDEBUG
  options.append(" -save-temps");
--- a/intern/cycles/device/hip/util.h
+++ b/intern/cycles/device/hip/util.h
@@ -77,6 +77,18 @@ static inline bool hipIsRDNA2OrNewer(const int hipDevId)
  return (major > 10 || (major == 10 && minor >= 3));
 }

+static inline bool hipNeedPreciseMath(const std::string &arch)
+{
+#  ifdef _WIN32
+  /* Enable stricter math options for RDNA2 GPUs (compiler bug on Windows). */
+  return (arch == "gfx1030" || arch == "gfx1031" || arch == "gfx1032" || arch == "gfx1033" ||
+          arch == "gfx1034" || arch == "gfx1035" || arch == "gfx1036");
+#  else
+  (void)arch;
+  return false;
+#  endif
+}
+
 static inline bool hipSupportsDeviceOIDN(const int hipDevId)
 {
  /* Matches HIPDevice::getArch in HIP. */
--- a/intern/cycles/device/hiprt/device_impl.cpp
+++ b/intern/cycles/device/hiprt/device_impl.cpp
@@ -5,6 +5,7 @@
 #ifdef WITH_HIPRT

 #  include "device/hiprt/device_impl.h"
+#  include "device/hip/util.h"
 #  include "kernel/device/hiprt/globals.h"

 #  include "util/log.h"
@@ -225,6 +226,11 @@ string HIPRTDevice::compile_kernel(const uint kernel_features, const char *name,
  options.append(
      "-Wno-parentheses-equality -Wno-unused-value -ffast-math -O3 -std=c++17 -D __HIPRT__");
  options.append(" --offload-arch=").append(arch.c_str());
+  if (hipNeedPreciseMath(arch)) {
+    options.append(
+        " -fhip-fp32-correctly-rounded-divide-sqrt -fno-gpu-approx-transcendentals "
+        "-fgpu-flush-denormals-to-zero -ffp-contract=off");
+  }
 #  ifdef WITH_NANOVDB
  options.append(" -D WITH_NANOVDB");
 #  endif
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -643,6 +643,21 @@ endif()

 # HIP module

+# Workaround for SDK 6.3.42560-881c2d702 compiler bug on RDNA2
+# Issue #136138
+function(get_hip_math_flag arch math_flag)
+  if(WIN32 AND (${arch} MATCHES "gfx1030|gfx1031|gfx1032|gfx1033|gfx1034|gfx1035|gfx1036"))
+      set(result
+        -fhip-fp32-correctly-rounded-divide-sqrt
+        -fno-gpu-approx-transcendentals
+        -fgpu-flush-denormals-to-zero
+        -ffp-contract=off)
+  else()
+      set(result "")
+  endif()
+  set(${math_flag} "${result}" PARENT_SCOPE)
+endfunction()
+
 if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
  # build for each arch
  set(hip_sources device/hip/kernel.cpp
@@ -683,6 +698,8 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
      set(hip_opt_flags)
    endif()

+    get_hip_math_flag(${arch} math_flag)
+
    set(hip_flags
      ${hip_flags}
      --offload-arch=${arch}
@@ -698,6 +715,7 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
      -Wno-parentheses-equality
      -Wno-unused-value
      -ffast-math
+      ${math_flag}
      ${hip_opt_flags}
      -o ${CMAKE_CURRENT_BINARY_DIR}/${hip_file})

@@ -793,10 +811,12 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIPRT)

  set(hiprt_hipfb)
  foreach(arch ${CYCLES_HIP_BINARIES_ARCH})
+    get_hip_math_flag(${arch} math_flag)
    set(hiprt_file ${CMAKE_CURRENT_BINARY_DIR}/kernel_rt_${arch}.hipfb)
    set(hiprt_file_compressed ${hiprt_file}.zst)
    set(hiprt_flags
    ${hiprt_compile_flags}
+    ${math_flag}
    --offload-arch=${arch}
    -o ${hiprt_file})