From f96f3c33af64bb2c422a438b19e203b84a03e21d Mon Sep 17 00:00:00 2001 From: Lukas Stockner Date: Sun, 29 Sep 2024 23:38:05 +0200 Subject: [PATCH] Cycles: Add some more math optimizations This enables three additional math optimizations: -ffp-contract=fast (enables FMA generation) -freciprocal-math (enables x/y -> x*(1/y)) -fassociative-math (enables e.g. a*b + c*b -> (a+c)*b) These are used on Windows and HIP anyways, so our code can't expect exact IEEE semantics in any case. The only difference between the new set and -ffast-math is that we don't use -ffinite-math-only since this causes issues with the BVH (see ce1f2e271d) and breaks e.g. isnan. This causes a ~1.5% speedup in my very quick test, but might be higher for some more math-intensive cases. Pull Request: https://projects.blender.org/blender/blender/pulls/128342 --- intern/cycles/CMakeLists.txt | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/intern/cycles/CMakeLists.txt b/intern/cycles/CMakeLists.txt index d3b634cd6ed..f60dfb7e488 100644 --- a/intern/cycles/CMakeLists.txt +++ b/intern/cycles/CMakeLists.txt @@ -158,6 +158,14 @@ elseif(CMAKE_COMPILER_IS_GNUCC OR (CMAKE_CXX_COMPILER_ID MATCHES "Clang")) list(APPEND CYCLES_MATH_FLAGS "-fno-math-errno") # Let compiler optimize 0.0 - x without worrying about signed zeros. list(APPEND CYCLES_MATH_FLAGS "-fno-signed-zeros") + # Let the compiler generate fused multiply-add instructions + list(APPEND CYCLES_MATH_FLAGS "-ffp-contract=fast") + # Let the compiler replace x/y with x*(1/y) + list(APPEND CYCLES_MATH_FLAGS "-freciprocal-math") + # Let the compiler reorder terms to save operations + list(APPEND CYCLES_MATH_FLAGS "-fassociative-math") + # Don't enable -ffinite-math-only since the BVH code relies on NaNs. + # Otherwise, we could just use -ffast-math. if(CMAKE_COMPILER_IS_GNUCC) # Assume no signal trapping for better code generation.