There is a known precision bug in the current HIP compiler version (RDNA2 family/Windows) that has already been fixed and will be available in a future HIP SDK release. Enabling more precise math prevents the artifacts. This may cause a 5-10% performance drop in some scenes. Fix #136138: Microfacet BSDF Fix #136449: Hair BSDF Pull Request: https://projects.blender.org/blender/blender/pulls/136341
This commit is contained in:
committed by
Brecht Van Lommel
parent
821e0024d4
commit
ae710101f5
@@ -261,6 +261,11 @@ string HIPDevice::compile_kernel(const uint kernel_features, const char *name, c
|
||||
|
||||
const char *const kernel_ext = "genco";
|
||||
std::string options = "-Wno-parentheses-equality -Wno-unused-value -ffast-math";
|
||||
if (hipNeedPreciseMath(arch)) {
|
||||
options.append(
|
||||
" -fhip-fp32-correctly-rounded-divide-sqrt -fno-gpu-approx-transcendentals "
|
||||
"-fgpu-flush-denormals-to-zero -ffp-contract=off");
|
||||
}
|
||||
|
||||
# ifndef NDEBUG
|
||||
options.append(" -save-temps");
|
||||
|
||||
@@ -77,6 +77,18 @@ static inline bool hipIsRDNA2OrNewer(const int hipDevId)
|
||||
return (major > 10 || (major == 10 && minor >= 3));
|
||||
}
|
||||
|
||||
static inline bool hipNeedPreciseMath(const std::string &arch)
|
||||
{
|
||||
# ifdef _WIN32
|
||||
/* Enable stricter math options for RDNA2 GPUs (compiler bug on Windows). */
|
||||
return (arch == "gfx1030" || arch == "gfx1031" || arch == "gfx1032" || arch == "gfx1033" ||
|
||||
arch == "gfx1034" || arch == "gfx1035" || arch == "gfx1036");
|
||||
# else
|
||||
(void)arch;
|
||||
return false;
|
||||
# endif
|
||||
}
|
||||
|
||||
static inline bool hipSupportsDeviceOIDN(const int hipDevId)
|
||||
{
|
||||
/* Matches HIPDevice::getArch in HIP. */
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
#ifdef WITH_HIPRT
|
||||
|
||||
# include "device/hiprt/device_impl.h"
|
||||
# include "device/hip/util.h"
|
||||
# include "kernel/device/hiprt/globals.h"
|
||||
|
||||
# include "util/log.h"
|
||||
@@ -225,6 +226,11 @@ string HIPRTDevice::compile_kernel(const uint kernel_features, const char *name,
|
||||
options.append(
|
||||
"-Wno-parentheses-equality -Wno-unused-value -ffast-math -O3 -std=c++17 -D __HIPRT__");
|
||||
options.append(" --offload-arch=").append(arch.c_str());
|
||||
if (hipNeedPreciseMath(arch)) {
|
||||
options.append(
|
||||
" -fhip-fp32-correctly-rounded-divide-sqrt -fno-gpu-approx-transcendentals "
|
||||
"-fgpu-flush-denormals-to-zero -ffp-contract=off");
|
||||
}
|
||||
# ifdef WITH_NANOVDB
|
||||
options.append(" -D WITH_NANOVDB");
|
||||
# endif
|
||||
|
||||
@@ -643,6 +643,21 @@ endif()
|
||||
|
||||
# HIP module
|
||||
|
||||
# Workaround for SDK 6.3.42560-881c2d702 compiler bug on RDNA2
|
||||
# Issue #136138
|
||||
function(get_hip_math_flag arch math_flag)
|
||||
if(WIN32 AND (${arch} MATCHES "gfx1030|gfx1031|gfx1032|gfx1033|gfx1034|gfx1035|gfx1036"))
|
||||
set(result
|
||||
-fhip-fp32-correctly-rounded-divide-sqrt
|
||||
-fno-gpu-approx-transcendentals
|
||||
-fgpu-flush-denormals-to-zero
|
||||
-ffp-contract=off)
|
||||
else()
|
||||
set(result "")
|
||||
endif()
|
||||
set(${math_flag} "${result}" PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
|
||||
# build for each arch
|
||||
set(hip_sources device/hip/kernel.cpp
|
||||
@@ -683,6 +698,8 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
|
||||
set(hip_opt_flags)
|
||||
endif()
|
||||
|
||||
get_hip_math_flag(${arch} math_flag)
|
||||
|
||||
set(hip_flags
|
||||
${hip_flags}
|
||||
--offload-arch=${arch}
|
||||
@@ -698,6 +715,7 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIP)
|
||||
-Wno-parentheses-equality
|
||||
-Wno-unused-value
|
||||
-ffast-math
|
||||
${math_flag}
|
||||
${hip_opt_flags}
|
||||
-o ${CMAKE_CURRENT_BINARY_DIR}/${hip_file})
|
||||
|
||||
@@ -793,10 +811,12 @@ if(WITH_CYCLES_HIP_BINARIES AND WITH_CYCLES_DEVICE_HIPRT)
|
||||
|
||||
set(hiprt_hipfb)
|
||||
foreach(arch ${CYCLES_HIP_BINARIES_ARCH})
|
||||
get_hip_math_flag(${arch} math_flag)
|
||||
set(hiprt_file ${CMAKE_CURRENT_BINARY_DIR}/kernel_rt_${arch}.hipfb)
|
||||
set(hiprt_file_compressed ${hiprt_file}.zst)
|
||||
set(hiprt_flags
|
||||
${hiprt_compile_flags}
|
||||
${math_flag}
|
||||
--offload-arch=${arch}
|
||||
-o ${hiprt_file})
|
||||
|
||||
|
||||
Reference in New Issue
Block a user