diff --git a/intern/cycles/device/hip/device_impl.cpp b/intern/cycles/device/hip/device_impl.cpp index 567e9139452..6f147ace413 100644 --- a/intern/cycles/device/hip/device_impl.cpp +++ b/intern/cycles/device/hip/device_impl.cpp @@ -261,6 +261,11 @@ string HIPDevice::compile_kernel(const uint kernel_features, const char *name, c const char *const kernel_ext = "genco"; std::string options = "-Wno-parentheses-equality -Wno-unused-value -ffast-math"; + if (hipNeedPreciseMath(arch)) { + options.append( + " -fhip-fp32-correctly-rounded-divide-sqrt -fno-gpu-approx-transcendentals " + "-fgpu-flush-denormals-to-zero -ffp-contract=off"); + } # ifndef NDEBUG options.append(" -save-temps"); diff --git a/intern/cycles/device/hip/util.h b/intern/cycles/device/hip/util.h index 54caa9379c0..7c4d776cc1d 100644 --- a/intern/cycles/device/hip/util.h +++ b/intern/cycles/device/hip/util.h @@ -77,6 +77,18 @@ static inline bool hipIsRDNA2OrNewer(const int hipDevId) return (major > 10 || (major == 10 && minor >= 3)); } +static inline bool hipNeedPreciseMath(const std::string &arch) +{ +# ifdef _WIN32 + /* Enable stricter math options for RDNA2 GPUs (compiler bug on Windows). */ + return (arch == "gfx1030" || arch == "gfx1031" || arch == "gfx1032" || arch == "gfx1033" || + arch == "gfx1034" || arch == "gfx1035" || arch == "gfx1036"); +# else + (void)arch; + return false; +# endif +} + static inline bool hipSupportsDeviceOIDN(const int hipDevId) { /* Matches HIPDevice::getArch in HIP. */ diff --git a/intern/cycles/device/hiprt/device_impl.cpp b/intern/cycles/device/hiprt/device_impl.cpp index 0cedbd0da2e..92f71419e21 100644 --- a/intern/cycles/device/hiprt/device_impl.cpp +++ b/intern/cycles/device/hiprt/device_impl.cpp @@ -5,6 +5,7 @@ #ifdef WITH_HIPRT # include "device/hiprt/device_impl.h" +# include "device/hip/util.h" # include "kernel/device/hiprt/globals.h" # include "util/log.h" @@ -225,6 +226,11 @@ string HIPRTDevice::compile_kernel(const uint kernel_features, const char *name, options.append( "-Wno-parentheses-equality -Wno-unused-value -ffast-math -O3 -std=c++17 -D __HIPRT__"); options.append(" --offload-arch=").append(arch.c_str()); + if (hipNeedPreciseMath(arch)) { + options.append( + " -fhip-fp32-correctly-rounded-divide-sqrt -fno-gpu-approx-transcendentals " + "-fgpu-flush-denormals-to-zero -ffp-contract=off"); + } # ifdef WITH_NANOVDB options.append(" -D WITH_NANOVDB"); # endif diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 05dc3c5979d..5791ba4a354 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -643,6 +643,21 @@ endif() # HIP module +# Workaround for SDK 6.3.42560-881c2d702 compiler bug on RDNA2 +# Issue #136138 +function(get_hip_math_flag arch math_flag) + if(WIN32 AND (${arch} MATCHES "gfx1030|gfx1031|gfx1032|gfx1033|gfx1034|gfx1035|gfx1036")) + set(result + -fhip-fp32-correctly-rounded-divide-sqrt + -fno-gpu-approx-transcendentals + -fgpu-flush-denormals-to-zero + -ffp-contract=off) + else() + set(result "") + endif() + set(${math_flag} "${result}" PARENT_SCOPE) +endfunction() + if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP) # build for each arch set(hip_sources device/hip/kernel.cpp @@ -683,6 +698,8 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP) set(hip_opt_flags) endif() + get_hip_math_flag(${arch} math_flag) + set(hip_flags ${hip_flags} --offload-arch=${arch} @@ -698,6 +715,7 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP) -Wno-parentheses-equality -Wno-unused-value -ffast-math + ${math_flag} ${hip_opt_flags} -o ${CMAKE_CURRENT_BINARY_DIR}/${hip_file}) @@ -793,10 +811,12 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIPRT) set(hiprt_hipfb) foreach(arch ${CYCLES_HIP_BINARIES_ARCH}) + get_hip_math_flag(${arch} math_flag) set(hiprt_file ${CMAKE_CURRENT_BINARY_DIR}/kernel_rt_${arch}.hipfb) set(hiprt_file_compressed ${hiprt_file}.zst) set(hiprt_flags ${hiprt_compile_flags} + ${math_flag} --offload-arch=${arch} -o ${hiprt_file})