Cleanup: Cycles: Remove unnecessary SSE4.2 CPU kernel

This is the minimum requirement, so just the regular kernel already
includes these instructions if supported by the CPU architecture.
This commit is contained in:
Brecht Van Lommel
2025-01-11 20:27:19 +01:00
parent 89b793f130
commit 2bf6d0fd71
11 changed files with 6 additions and 85 deletions

View File

@@ -49,7 +49,6 @@ void device_cpu_info(vector<DeviceInfo> &devices)
string device_cpu_capabilities()
{
string capabilities;
capabilities += system_cpu_support_sse42() ? "SSE42 " : "";
capabilities += system_cpu_support_avx2() ? "AVX2" : "";
if (capabilities[capabilities.size() - 1] == ' ') {
capabilities.resize(capabilities.size() - 1);

View File

@@ -8,8 +8,7 @@
CCL_NAMESPACE_BEGIN
#define KERNEL_FUNCTIONS(name) \
KERNEL_NAME_EVAL(cpu, name), KERNEL_NAME_EVAL(cpu_sse42, name), KERNEL_NAME_EVAL(cpu_avx2, name)
#define KERNEL_FUNCTIONS(name) KERNEL_NAME_EVAL(cpu, name), KERNEL_NAME_EVAL(cpu_avx2, name)
#define REGISTER_KERNEL(name) name(KERNEL_FUNCTIONS(name))
#define REGISTER_KERNEL_FILM_CONVERT(name) \

View File

@@ -13,14 +13,12 @@ CCL_NAMESPACE_BEGIN
*
* Provides a function-call-like API which gets routed to the most suitable implementation.
*
* For example, on a computer which only has SSE4.2 the kernel_sse42 will be used. */
* For example, on a computer which only has AVX2 the kernel_avx2 will be used. */
template<typename FunctionType> class CPUKernelFunction {
public:
CPUKernelFunction(FunctionType kernel_default,
FunctionType kernel_sse42,
FunctionType kernel_avx2)
CPUKernelFunction(FunctionType kernel_default, FunctionType kernel_avx2)
{
kernel_info_ = get_best_kernel_info(kernel_default, kernel_sse42, kernel_avx2);
kernel_info_ = get_best_kernel_info(kernel_default, kernel_avx2);
}
template<typename... Args> auto operator()(Args... args) const
@@ -53,12 +51,9 @@ template<typename FunctionType> class CPUKernelFunction {
FunctionType kernel;
};
KernelInfo get_best_kernel_info(FunctionType kernel_default,
FunctionType kernel_sse42,
FunctionType kernel_avx2)
KernelInfo get_best_kernel_info(FunctionType kernel_default, FunctionType kernel_avx2)
{
/* Silence warnings about unused variables when compiling without some architectures. */
(void)kernel_sse42;
(void)kernel_avx2;
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
@@ -67,12 +62,6 @@ template<typename FunctionType> class CPUKernelFunction {
}
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE42
if (DebugFlags().cpu.has_sse42() && system_cpu_support_sse42()) {
return KernelInfo("SSE4.2", kernel_sse42);
}
#endif
return KernelInfo("default", kernel_default);
}

View File

@@ -15,7 +15,6 @@ set(INC_SYS
set(SRC_KERNEL_DEVICE_CPU
device/cpu/globals.cpp
device/cpu/kernel.cpp
device/cpu/kernel_sse42.cpp
device/cpu/kernel_avx2.cpp
)
@@ -1348,10 +1347,6 @@ if(DEFINED CYCLES_KERNEL_FLAGS)
set_source_files_properties(device/cpu/kernel.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_KERNEL_FLAGS}")
endif()
if(CXX_HAS_SSE42)
set_source_files_properties(device/cpu/kernel_sse42.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE42_FLAGS}")
endif()
if(CXX_HAS_AVX2)
set_source_files_properties(device/cpu/kernel_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_FLAGS}")
endif()

View File

@@ -35,9 +35,6 @@ void kernel_global_memory_copy(KernelGlobalsCPU *kg,
#define KERNEL_ARCH cpu
#include "kernel/device/cpu/kernel_arch.h"
#define KERNEL_ARCH cpu_sse42
#include "kernel/device/cpu/kernel_arch.h"
#define KERNEL_ARCH cpu_avx2
#include "kernel/device/cpu/kernel_arch.h"

View File

@@ -1,27 +0,0 @@
/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
*
* SPDX-License-Identifier: Apache-2.0 */
/* Optimized CPU kernel entry points. This file is compiled with SSE42
* optimization flags and nearly all functions inlined, while kernel.cpp
* is compiled without for other CPU's. */
#include "util/optimization.h"
#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE42
# define KERNEL_STUB
#else
/* SSE optimization disabled for now on 32 bit, see bug #36316. */
# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
# define __KERNEL_SSE__
# define __KERNEL_SSE2__
# define __KERNEL_SSE3__
# define __KERNEL_SSSE3__
# define __KERNEL_SSE42__
# endif
#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE42 */
#include "kernel/device/cpu/globals.h"
#include "kernel/device/cpu/kernel.h"
#define KERNEL_ARCH cpu_sse42
#include "kernel/device/cpu/kernel_arch_impl.h"

View File

@@ -27,7 +27,6 @@ set(SRC
time.cpp
transform.cpp
transform_avx2.cpp
transform_sse42.cpp
windows.cpp
)
@@ -125,9 +124,6 @@ set(SRC_HEADERS
xml.h
)
if(CXX_HAS_SSE42)
set_source_files_properties(transform_sse42.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE42_FLAGS}")
endif()
if(CXX_HAS_AVX2)
set_source_files_properties(transform_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_FLAGS}")
endif()

View File

@@ -27,7 +27,6 @@ void DebugFlags::CPU::reset()
} while (0)
CHECK_CPU_FLAGS(avx2, "CYCLES_CPU_NO_AVX2");
CHECK_CPU_FLAGS(sse42, "CYCLES_CPU_NO_SSE42");
#undef STRINGIFY
#undef CHECK_CPU_FLAGS

View File

@@ -8,16 +8,10 @@
/* x86
*
* Compile a regular and SSE42 kernel. */
* Compile a regular kernel. */
# if defined(i386) || defined(_M_IX86)
/* We require minimum SSE4.2 support on x86, so auto enable. */
# define __KERNEL_SSE42__
# ifdef WITH_KERNEL_SSE42
# define WITH_CYCLES_OPTIMIZED_KERNEL_SSE42
# endif
/* x86-64
*
* Compile a regular (includes SSE4.2) and AVX2 kernel. */

View File

@@ -408,7 +408,6 @@ ccl_device_inline float4 quat_interpolate(const float4 q1, const float4 q2, cons
}
#ifndef __KERNEL_GPU__
void transform_inverse_cpu_sse42(const Transform &tfm, Transform &itfm);
void transform_inverse_cpu_avx2(const Transform &tfm, Transform &itfm);
#endif
@@ -497,11 +496,6 @@ ccl_device_inline Transform transform_inverse(const Transform tfm)
transform_inverse_cpu_avx2(tfm, itfm);
return itfm;
}
if (system_cpu_support_sse42()) {
Transform itfm;
transform_inverse_cpu_sse42(tfm, itfm);
return itfm;
}
#endif
return transform_inverse_impl(tfm);

View File

@@ -1,14 +0,0 @@
/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
*
* SPDX-License-Identifier: Apache-2.0 */
#include "util/transform.h"
CCL_NAMESPACE_BEGIN
void transform_inverse_cpu_sse42(const Transform &tfm, Transform &itfm)
{
itfm = transform_inverse_impl(tfm);
}
CCL_NAMESPACE_END