From 2bf6d0fd7161ed595787d749b36397aa983f3621 Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Sat, 11 Jan 2025 20:27:19 +0100 Subject: [PATCH] Cleanup: Cycles: Remove unnecessary SSE4.2 CPU kernel This is the minimum requirement, so just the regular kernel already includes these instructions if supported by the CPU architecture. --- intern/cycles/device/cpu/device.cpp | 1 - intern/cycles/device/cpu/kernel.cpp | 3 +-- intern/cycles/device/cpu/kernel_function.h | 19 +++---------- intern/cycles/kernel/CMakeLists.txt | 5 ---- intern/cycles/kernel/device/cpu/kernel.h | 3 --- .../cycles/kernel/device/cpu/kernel_sse42.cpp | 27 ------------------- intern/cycles/util/CMakeLists.txt | 4 --- intern/cycles/util/debug.cpp | 1 - intern/cycles/util/optimization.h | 8 +----- intern/cycles/util/transform.h | 6 ----- intern/cycles/util/transform_sse42.cpp | 14 ---------- 11 files changed, 6 insertions(+), 85 deletions(-) delete mode 100644 intern/cycles/kernel/device/cpu/kernel_sse42.cpp delete mode 100644 intern/cycles/util/transform_sse42.cpp diff --git a/intern/cycles/device/cpu/device.cpp b/intern/cycles/device/cpu/device.cpp index 0a855d90a16..c89a4d047c7 100644 --- a/intern/cycles/device/cpu/device.cpp +++ b/intern/cycles/device/cpu/device.cpp @@ -49,7 +49,6 @@ void device_cpu_info(vector &devices) string device_cpu_capabilities() { string capabilities; - capabilities += system_cpu_support_sse42() ? "SSE42 " : ""; capabilities += system_cpu_support_avx2() ? "AVX2" : ""; if (capabilities[capabilities.size() - 1] == ' ') { capabilities.resize(capabilities.size() - 1); diff --git a/intern/cycles/device/cpu/kernel.cpp b/intern/cycles/device/cpu/kernel.cpp index af8e27560da..523573a19ec 100644 --- a/intern/cycles/device/cpu/kernel.cpp +++ b/intern/cycles/device/cpu/kernel.cpp @@ -8,8 +8,7 @@ CCL_NAMESPACE_BEGIN -#define KERNEL_FUNCTIONS(name) \ - KERNEL_NAME_EVAL(cpu, name), KERNEL_NAME_EVAL(cpu_sse42, name), KERNEL_NAME_EVAL(cpu_avx2, name) +#define KERNEL_FUNCTIONS(name) KERNEL_NAME_EVAL(cpu, name), KERNEL_NAME_EVAL(cpu_avx2, name) #define REGISTER_KERNEL(name) name(KERNEL_FUNCTIONS(name)) #define REGISTER_KERNEL_FILM_CONVERT(name) \ diff --git a/intern/cycles/device/cpu/kernel_function.h b/intern/cycles/device/cpu/kernel_function.h index 126b9cd7cdf..f45059a079e 100644 --- a/intern/cycles/device/cpu/kernel_function.h +++ b/intern/cycles/device/cpu/kernel_function.h @@ -13,14 +13,12 @@ CCL_NAMESPACE_BEGIN * * Provides a function-call-like API which gets routed to the most suitable implementation. * - * For example, on a computer which only has SSE4.2 the kernel_sse42 will be used. */ + * For example, on a computer which only has AVX2 the kernel_avx2 will be used. */ template class CPUKernelFunction { public: - CPUKernelFunction(FunctionType kernel_default, - FunctionType kernel_sse42, - FunctionType kernel_avx2) + CPUKernelFunction(FunctionType kernel_default, FunctionType kernel_avx2) { - kernel_info_ = get_best_kernel_info(kernel_default, kernel_sse42, kernel_avx2); + kernel_info_ = get_best_kernel_info(kernel_default, kernel_avx2); } template auto operator()(Args... args) const @@ -53,12 +51,9 @@ template class CPUKernelFunction { FunctionType kernel; }; - KernelInfo get_best_kernel_info(FunctionType kernel_default, - FunctionType kernel_sse42, - FunctionType kernel_avx2) + KernelInfo get_best_kernel_info(FunctionType kernel_default, FunctionType kernel_avx2) { /* Silence warnings about unused variables when compiling without some architectures. */ - (void)kernel_sse42; (void)kernel_avx2; #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2 @@ -67,12 +62,6 @@ template class CPUKernelFunction { } #endif -#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE42 - if (DebugFlags().cpu.has_sse42() && system_cpu_support_sse42()) { - return KernelInfo("SSE4.2", kernel_sse42); - } -#endif - return KernelInfo("default", kernel_default); } diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 134367cd2c0..03ab6452d8b 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -15,7 +15,6 @@ set(INC_SYS set(SRC_KERNEL_DEVICE_CPU device/cpu/globals.cpp device/cpu/kernel.cpp - device/cpu/kernel_sse42.cpp device/cpu/kernel_avx2.cpp ) @@ -1348,10 +1347,6 @@ if(DEFINED CYCLES_KERNEL_FLAGS) set_source_files_properties(device/cpu/kernel.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_KERNEL_FLAGS}") endif() -if(CXX_HAS_SSE42) - set_source_files_properties(device/cpu/kernel_sse42.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE42_FLAGS}") -endif() - if(CXX_HAS_AVX2) set_source_files_properties(device/cpu/kernel_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_FLAGS}") endif() diff --git a/intern/cycles/kernel/device/cpu/kernel.h b/intern/cycles/kernel/device/cpu/kernel.h index 4b5ae18af40..c03677d9cd9 100644 --- a/intern/cycles/kernel/device/cpu/kernel.h +++ b/intern/cycles/kernel/device/cpu/kernel.h @@ -35,9 +35,6 @@ void kernel_global_memory_copy(KernelGlobalsCPU *kg, #define KERNEL_ARCH cpu #include "kernel/device/cpu/kernel_arch.h" -#define KERNEL_ARCH cpu_sse42 -#include "kernel/device/cpu/kernel_arch.h" - #define KERNEL_ARCH cpu_avx2 #include "kernel/device/cpu/kernel_arch.h" diff --git a/intern/cycles/kernel/device/cpu/kernel_sse42.cpp b/intern/cycles/kernel/device/cpu/kernel_sse42.cpp deleted file mode 100644 index 2970d84b27a..00000000000 --- a/intern/cycles/kernel/device/cpu/kernel_sse42.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation - * - * SPDX-License-Identifier: Apache-2.0 */ - -/* Optimized CPU kernel entry points. This file is compiled with SSE42 - * optimization flags and nearly all functions inlined, while kernel.cpp - * is compiled without for other CPU's. */ - -#include "util/optimization.h" - -#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE42 -# define KERNEL_STUB -#else -/* SSE optimization disabled for now on 32 bit, see bug #36316. */ -# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86))) -# define __KERNEL_SSE__ -# define __KERNEL_SSE2__ -# define __KERNEL_SSE3__ -# define __KERNEL_SSSE3__ -# define __KERNEL_SSE42__ -# endif -#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE42 */ - -#include "kernel/device/cpu/globals.h" -#include "kernel/device/cpu/kernel.h" -#define KERNEL_ARCH cpu_sse42 -#include "kernel/device/cpu/kernel_arch_impl.h" diff --git a/intern/cycles/util/CMakeLists.txt b/intern/cycles/util/CMakeLists.txt index cb867db710f..df0ea48de13 100644 --- a/intern/cycles/util/CMakeLists.txt +++ b/intern/cycles/util/CMakeLists.txt @@ -27,7 +27,6 @@ set(SRC time.cpp transform.cpp transform_avx2.cpp - transform_sse42.cpp windows.cpp ) @@ -125,9 +124,6 @@ set(SRC_HEADERS xml.h ) -if(CXX_HAS_SSE42) - set_source_files_properties(transform_sse42.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE42_FLAGS}") -endif() if(CXX_HAS_AVX2) set_source_files_properties(transform_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_FLAGS}") endif() diff --git a/intern/cycles/util/debug.cpp b/intern/cycles/util/debug.cpp index 664f3df0aae..a15eecd5bfa 100644 --- a/intern/cycles/util/debug.cpp +++ b/intern/cycles/util/debug.cpp @@ -27,7 +27,6 @@ void DebugFlags::CPU::reset() } while (0) CHECK_CPU_FLAGS(avx2, "CYCLES_CPU_NO_AVX2"); - CHECK_CPU_FLAGS(sse42, "CYCLES_CPU_NO_SSE42"); #undef STRINGIFY #undef CHECK_CPU_FLAGS diff --git a/intern/cycles/util/optimization.h b/intern/cycles/util/optimization.h index cd9bc9a0f5f..4eb231eae2e 100644 --- a/intern/cycles/util/optimization.h +++ b/intern/cycles/util/optimization.h @@ -8,16 +8,10 @@ /* x86 * - * Compile a regular and SSE42 kernel. */ + * Compile a regular kernel. */ # if defined(i386) || defined(_M_IX86) -/* We require minimum SSE4.2 support on x86, so auto enable. */ -# define __KERNEL_SSE42__ -# ifdef WITH_KERNEL_SSE42 -# define WITH_CYCLES_OPTIMIZED_KERNEL_SSE42 -# endif - /* x86-64 * * Compile a regular (includes SSE4.2) and AVX2 kernel. */ diff --git a/intern/cycles/util/transform.h b/intern/cycles/util/transform.h index fcde90a15bd..db0938e6d66 100644 --- a/intern/cycles/util/transform.h +++ b/intern/cycles/util/transform.h @@ -408,7 +408,6 @@ ccl_device_inline float4 quat_interpolate(const float4 q1, const float4 q2, cons } #ifndef __KERNEL_GPU__ -void transform_inverse_cpu_sse42(const Transform &tfm, Transform &itfm); void transform_inverse_cpu_avx2(const Transform &tfm, Transform &itfm); #endif @@ -497,11 +496,6 @@ ccl_device_inline Transform transform_inverse(const Transform tfm) transform_inverse_cpu_avx2(tfm, itfm); return itfm; } - if (system_cpu_support_sse42()) { - Transform itfm; - transform_inverse_cpu_sse42(tfm, itfm); - return itfm; - } #endif return transform_inverse_impl(tfm); diff --git a/intern/cycles/util/transform_sse42.cpp b/intern/cycles/util/transform_sse42.cpp deleted file mode 100644 index b47392e6f34..00000000000 --- a/intern/cycles/util/transform_sse42.cpp +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation - * - * SPDX-License-Identifier: Apache-2.0 */ - -#include "util/transform.h" - -CCL_NAMESPACE_BEGIN - -void transform_inverse_cpu_sse42(const Transform &tfm, Transform &itfm) -{ - itfm = transform_inverse_impl(tfm); -} - -CCL_NAMESPACE_END