Cleanup: Cycles: Remove unnecessary SSE4.2 CPU kernel
This is the minimum requirement, so just the regular kernel already includes these instructions if supported by the CPU architecture.
This commit is contained in:
@@ -49,7 +49,6 @@ void device_cpu_info(vector<DeviceInfo> &devices)
|
||||
string device_cpu_capabilities()
|
||||
{
|
||||
string capabilities;
|
||||
capabilities += system_cpu_support_sse42() ? "SSE42 " : "";
|
||||
capabilities += system_cpu_support_avx2() ? "AVX2" : "";
|
||||
if (capabilities[capabilities.size() - 1] == ' ') {
|
||||
capabilities.resize(capabilities.size() - 1);
|
||||
|
||||
@@ -8,8 +8,7 @@
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
#define KERNEL_FUNCTIONS(name) \
|
||||
KERNEL_NAME_EVAL(cpu, name), KERNEL_NAME_EVAL(cpu_sse42, name), KERNEL_NAME_EVAL(cpu_avx2, name)
|
||||
#define KERNEL_FUNCTIONS(name) KERNEL_NAME_EVAL(cpu, name), KERNEL_NAME_EVAL(cpu_avx2, name)
|
||||
|
||||
#define REGISTER_KERNEL(name) name(KERNEL_FUNCTIONS(name))
|
||||
#define REGISTER_KERNEL_FILM_CONVERT(name) \
|
||||
|
||||
@@ -13,14 +13,12 @@ CCL_NAMESPACE_BEGIN
|
||||
*
|
||||
* Provides a function-call-like API which gets routed to the most suitable implementation.
|
||||
*
|
||||
* For example, on a computer which only has SSE4.2 the kernel_sse42 will be used. */
|
||||
* For example, on a computer which only has AVX2 the kernel_avx2 will be used. */
|
||||
template<typename FunctionType> class CPUKernelFunction {
|
||||
public:
|
||||
CPUKernelFunction(FunctionType kernel_default,
|
||||
FunctionType kernel_sse42,
|
||||
FunctionType kernel_avx2)
|
||||
CPUKernelFunction(FunctionType kernel_default, FunctionType kernel_avx2)
|
||||
{
|
||||
kernel_info_ = get_best_kernel_info(kernel_default, kernel_sse42, kernel_avx2);
|
||||
kernel_info_ = get_best_kernel_info(kernel_default, kernel_avx2);
|
||||
}
|
||||
|
||||
template<typename... Args> auto operator()(Args... args) const
|
||||
@@ -53,12 +51,9 @@ template<typename FunctionType> class CPUKernelFunction {
|
||||
FunctionType kernel;
|
||||
};
|
||||
|
||||
KernelInfo get_best_kernel_info(FunctionType kernel_default,
|
||||
FunctionType kernel_sse42,
|
||||
FunctionType kernel_avx2)
|
||||
KernelInfo get_best_kernel_info(FunctionType kernel_default, FunctionType kernel_avx2)
|
||||
{
|
||||
/* Silence warnings about unused variables when compiling without some architectures. */
|
||||
(void)kernel_sse42;
|
||||
(void)kernel_avx2;
|
||||
|
||||
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
|
||||
@@ -67,12 +62,6 @@ template<typename FunctionType> class CPUKernelFunction {
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE42
|
||||
if (DebugFlags().cpu.has_sse42() && system_cpu_support_sse42()) {
|
||||
return KernelInfo("SSE4.2", kernel_sse42);
|
||||
}
|
||||
#endif
|
||||
|
||||
return KernelInfo("default", kernel_default);
|
||||
}
|
||||
|
||||
|
||||
@@ -15,7 +15,6 @@ set(INC_SYS
|
||||
set(SRC_KERNEL_DEVICE_CPU
|
||||
device/cpu/globals.cpp
|
||||
device/cpu/kernel.cpp
|
||||
device/cpu/kernel_sse42.cpp
|
||||
device/cpu/kernel_avx2.cpp
|
||||
)
|
||||
|
||||
@@ -1348,10 +1347,6 @@ if(DEFINED CYCLES_KERNEL_FLAGS)
|
||||
set_source_files_properties(device/cpu/kernel.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_KERNEL_FLAGS}")
|
||||
endif()
|
||||
|
||||
if(CXX_HAS_SSE42)
|
||||
set_source_files_properties(device/cpu/kernel_sse42.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE42_FLAGS}")
|
||||
endif()
|
||||
|
||||
if(CXX_HAS_AVX2)
|
||||
set_source_files_properties(device/cpu/kernel_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_FLAGS}")
|
||||
endif()
|
||||
|
||||
@@ -35,9 +35,6 @@ void kernel_global_memory_copy(KernelGlobalsCPU *kg,
|
||||
#define KERNEL_ARCH cpu
|
||||
#include "kernel/device/cpu/kernel_arch.h"
|
||||
|
||||
#define KERNEL_ARCH cpu_sse42
|
||||
#include "kernel/device/cpu/kernel_arch.h"
|
||||
|
||||
#define KERNEL_ARCH cpu_avx2
|
||||
#include "kernel/device/cpu/kernel_arch.h"
|
||||
|
||||
|
||||
@@ -1,27 +0,0 @@
|
||||
/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 */
|
||||
|
||||
/* Optimized CPU kernel entry points. This file is compiled with SSE42
|
||||
* optimization flags and nearly all functions inlined, while kernel.cpp
|
||||
* is compiled without for other CPU's. */
|
||||
|
||||
#include "util/optimization.h"
|
||||
|
||||
#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE42
|
||||
# define KERNEL_STUB
|
||||
#else
|
||||
/* SSE optimization disabled for now on 32 bit, see bug #36316. */
|
||||
# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
|
||||
# define __KERNEL_SSE__
|
||||
# define __KERNEL_SSE2__
|
||||
# define __KERNEL_SSE3__
|
||||
# define __KERNEL_SSSE3__
|
||||
# define __KERNEL_SSE42__
|
||||
# endif
|
||||
#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE42 */
|
||||
|
||||
#include "kernel/device/cpu/globals.h"
|
||||
#include "kernel/device/cpu/kernel.h"
|
||||
#define KERNEL_ARCH cpu_sse42
|
||||
#include "kernel/device/cpu/kernel_arch_impl.h"
|
||||
@@ -27,7 +27,6 @@ set(SRC
|
||||
time.cpp
|
||||
transform.cpp
|
||||
transform_avx2.cpp
|
||||
transform_sse42.cpp
|
||||
windows.cpp
|
||||
)
|
||||
|
||||
@@ -125,9 +124,6 @@ set(SRC_HEADERS
|
||||
xml.h
|
||||
)
|
||||
|
||||
if(CXX_HAS_SSE42)
|
||||
set_source_files_properties(transform_sse42.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE42_FLAGS}")
|
||||
endif()
|
||||
if(CXX_HAS_AVX2)
|
||||
set_source_files_properties(transform_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_FLAGS}")
|
||||
endif()
|
||||
|
||||
@@ -27,7 +27,6 @@ void DebugFlags::CPU::reset()
|
||||
} while (0)
|
||||
|
||||
CHECK_CPU_FLAGS(avx2, "CYCLES_CPU_NO_AVX2");
|
||||
CHECK_CPU_FLAGS(sse42, "CYCLES_CPU_NO_SSE42");
|
||||
|
||||
#undef STRINGIFY
|
||||
#undef CHECK_CPU_FLAGS
|
||||
|
||||
@@ -8,16 +8,10 @@
|
||||
|
||||
/* x86
|
||||
*
|
||||
* Compile a regular and SSE42 kernel. */
|
||||
* Compile a regular kernel. */
|
||||
|
||||
# if defined(i386) || defined(_M_IX86)
|
||||
|
||||
/* We require minimum SSE4.2 support on x86, so auto enable. */
|
||||
# define __KERNEL_SSE42__
|
||||
# ifdef WITH_KERNEL_SSE42
|
||||
# define WITH_CYCLES_OPTIMIZED_KERNEL_SSE42
|
||||
# endif
|
||||
|
||||
/* x86-64
|
||||
*
|
||||
* Compile a regular (includes SSE4.2) and AVX2 kernel. */
|
||||
|
||||
@@ -408,7 +408,6 @@ ccl_device_inline float4 quat_interpolate(const float4 q1, const float4 q2, cons
|
||||
}
|
||||
|
||||
#ifndef __KERNEL_GPU__
|
||||
void transform_inverse_cpu_sse42(const Transform &tfm, Transform &itfm);
|
||||
void transform_inverse_cpu_avx2(const Transform &tfm, Transform &itfm);
|
||||
#endif
|
||||
|
||||
@@ -497,11 +496,6 @@ ccl_device_inline Transform transform_inverse(const Transform tfm)
|
||||
transform_inverse_cpu_avx2(tfm, itfm);
|
||||
return itfm;
|
||||
}
|
||||
if (system_cpu_support_sse42()) {
|
||||
Transform itfm;
|
||||
transform_inverse_cpu_sse42(tfm, itfm);
|
||||
return itfm;
|
||||
}
|
||||
#endif
|
||||
|
||||
return transform_inverse_impl(tfm);
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0 */
|
||||
|
||||
#include "util/transform.h"
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
void transform_inverse_cpu_sse42(const Transform &tfm, Transform &itfm)
|
||||
{
|
||||
itfm = transform_inverse_impl(tfm);
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
Reference in New Issue
Block a user