Cleanup: Cycles: Remove unnecessary SSE4.2 CPU kernel
This is the minimum requirement, so just the regular kernel already includes these instructions if supported by the CPU architecture.
This commit is contained in:
@@ -49,7 +49,6 @@ void device_cpu_info(vector<DeviceInfo> &devices)
|
|||||||
string device_cpu_capabilities()
|
string device_cpu_capabilities()
|
||||||
{
|
{
|
||||||
string capabilities;
|
string capabilities;
|
||||||
capabilities += system_cpu_support_sse42() ? "SSE42 " : "";
|
|
||||||
capabilities += system_cpu_support_avx2() ? "AVX2" : "";
|
capabilities += system_cpu_support_avx2() ? "AVX2" : "";
|
||||||
if (capabilities[capabilities.size() - 1] == ' ') {
|
if (capabilities[capabilities.size() - 1] == ' ') {
|
||||||
capabilities.resize(capabilities.size() - 1);
|
capabilities.resize(capabilities.size() - 1);
|
||||||
|
|||||||
@@ -8,8 +8,7 @@
|
|||||||
|
|
||||||
CCL_NAMESPACE_BEGIN
|
CCL_NAMESPACE_BEGIN
|
||||||
|
|
||||||
#define KERNEL_FUNCTIONS(name) \
|
#define KERNEL_FUNCTIONS(name) KERNEL_NAME_EVAL(cpu, name), KERNEL_NAME_EVAL(cpu_avx2, name)
|
||||||
KERNEL_NAME_EVAL(cpu, name), KERNEL_NAME_EVAL(cpu_sse42, name), KERNEL_NAME_EVAL(cpu_avx2, name)
|
|
||||||
|
|
||||||
#define REGISTER_KERNEL(name) name(KERNEL_FUNCTIONS(name))
|
#define REGISTER_KERNEL(name) name(KERNEL_FUNCTIONS(name))
|
||||||
#define REGISTER_KERNEL_FILM_CONVERT(name) \
|
#define REGISTER_KERNEL_FILM_CONVERT(name) \
|
||||||
|
|||||||
@@ -13,14 +13,12 @@ CCL_NAMESPACE_BEGIN
|
|||||||
*
|
*
|
||||||
* Provides a function-call-like API which gets routed to the most suitable implementation.
|
* Provides a function-call-like API which gets routed to the most suitable implementation.
|
||||||
*
|
*
|
||||||
* For example, on a computer which only has SSE4.2 the kernel_sse42 will be used. */
|
* For example, on a computer which only has AVX2 the kernel_avx2 will be used. */
|
||||||
template<typename FunctionType> class CPUKernelFunction {
|
template<typename FunctionType> class CPUKernelFunction {
|
||||||
public:
|
public:
|
||||||
CPUKernelFunction(FunctionType kernel_default,
|
CPUKernelFunction(FunctionType kernel_default, FunctionType kernel_avx2)
|
||||||
FunctionType kernel_sse42,
|
|
||||||
FunctionType kernel_avx2)
|
|
||||||
{
|
{
|
||||||
kernel_info_ = get_best_kernel_info(kernel_default, kernel_sse42, kernel_avx2);
|
kernel_info_ = get_best_kernel_info(kernel_default, kernel_avx2);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename... Args> auto operator()(Args... args) const
|
template<typename... Args> auto operator()(Args... args) const
|
||||||
@@ -53,12 +51,9 @@ template<typename FunctionType> class CPUKernelFunction {
|
|||||||
FunctionType kernel;
|
FunctionType kernel;
|
||||||
};
|
};
|
||||||
|
|
||||||
KernelInfo get_best_kernel_info(FunctionType kernel_default,
|
KernelInfo get_best_kernel_info(FunctionType kernel_default, FunctionType kernel_avx2)
|
||||||
FunctionType kernel_sse42,
|
|
||||||
FunctionType kernel_avx2)
|
|
||||||
{
|
{
|
||||||
/* Silence warnings about unused variables when compiling without some architectures. */
|
/* Silence warnings about unused variables when compiling without some architectures. */
|
||||||
(void)kernel_sse42;
|
|
||||||
(void)kernel_avx2;
|
(void)kernel_avx2;
|
||||||
|
|
||||||
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
|
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
|
||||||
@@ -67,12 +62,6 @@ template<typename FunctionType> class CPUKernelFunction {
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE42
|
|
||||||
if (DebugFlags().cpu.has_sse42() && system_cpu_support_sse42()) {
|
|
||||||
return KernelInfo("SSE4.2", kernel_sse42);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return KernelInfo("default", kernel_default);
|
return KernelInfo("default", kernel_default);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -15,7 +15,6 @@ set(INC_SYS
|
|||||||
set(SRC_KERNEL_DEVICE_CPU
|
set(SRC_KERNEL_DEVICE_CPU
|
||||||
device/cpu/globals.cpp
|
device/cpu/globals.cpp
|
||||||
device/cpu/kernel.cpp
|
device/cpu/kernel.cpp
|
||||||
device/cpu/kernel_sse42.cpp
|
|
||||||
device/cpu/kernel_avx2.cpp
|
device/cpu/kernel_avx2.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -1348,10 +1347,6 @@ if(DEFINED CYCLES_KERNEL_FLAGS)
|
|||||||
set_source_files_properties(device/cpu/kernel.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_KERNEL_FLAGS}")
|
set_source_files_properties(device/cpu/kernel.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_KERNEL_FLAGS}")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(CXX_HAS_SSE42)
|
|
||||||
set_source_files_properties(device/cpu/kernel_sse42.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE42_FLAGS}")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if(CXX_HAS_AVX2)
|
if(CXX_HAS_AVX2)
|
||||||
set_source_files_properties(device/cpu/kernel_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_FLAGS}")
|
set_source_files_properties(device/cpu/kernel_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_FLAGS}")
|
||||||
endif()
|
endif()
|
||||||
|
|||||||
@@ -35,9 +35,6 @@ void kernel_global_memory_copy(KernelGlobalsCPU *kg,
|
|||||||
#define KERNEL_ARCH cpu
|
#define KERNEL_ARCH cpu
|
||||||
#include "kernel/device/cpu/kernel_arch.h"
|
#include "kernel/device/cpu/kernel_arch.h"
|
||||||
|
|
||||||
#define KERNEL_ARCH cpu_sse42
|
|
||||||
#include "kernel/device/cpu/kernel_arch.h"
|
|
||||||
|
|
||||||
#define KERNEL_ARCH cpu_avx2
|
#define KERNEL_ARCH cpu_avx2
|
||||||
#include "kernel/device/cpu/kernel_arch.h"
|
#include "kernel/device/cpu/kernel_arch.h"
|
||||||
|
|
||||||
|
|||||||
@@ -1,27 +0,0 @@
|
|||||||
/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0 */
|
|
||||||
|
|
||||||
/* Optimized CPU kernel entry points. This file is compiled with SSE42
|
|
||||||
* optimization flags and nearly all functions inlined, while kernel.cpp
|
|
||||||
* is compiled without for other CPU's. */
|
|
||||||
|
|
||||||
#include "util/optimization.h"
|
|
||||||
|
|
||||||
#ifndef WITH_CYCLES_OPTIMIZED_KERNEL_SSE42
|
|
||||||
# define KERNEL_STUB
|
|
||||||
#else
|
|
||||||
/* SSE optimization disabled for now on 32 bit, see bug #36316. */
|
|
||||||
# if !(defined(__GNUC__) && (defined(i386) || defined(_M_IX86)))
|
|
||||||
# define __KERNEL_SSE__
|
|
||||||
# define __KERNEL_SSE2__
|
|
||||||
# define __KERNEL_SSE3__
|
|
||||||
# define __KERNEL_SSSE3__
|
|
||||||
# define __KERNEL_SSE42__
|
|
||||||
# endif
|
|
||||||
#endif /* WITH_CYCLES_OPTIMIZED_KERNEL_SSE42 */
|
|
||||||
|
|
||||||
#include "kernel/device/cpu/globals.h"
|
|
||||||
#include "kernel/device/cpu/kernel.h"
|
|
||||||
#define KERNEL_ARCH cpu_sse42
|
|
||||||
#include "kernel/device/cpu/kernel_arch_impl.h"
|
|
||||||
@@ -27,7 +27,6 @@ set(SRC
|
|||||||
time.cpp
|
time.cpp
|
||||||
transform.cpp
|
transform.cpp
|
||||||
transform_avx2.cpp
|
transform_avx2.cpp
|
||||||
transform_sse42.cpp
|
|
||||||
windows.cpp
|
windows.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -125,9 +124,6 @@ set(SRC_HEADERS
|
|||||||
xml.h
|
xml.h
|
||||||
)
|
)
|
||||||
|
|
||||||
if(CXX_HAS_SSE42)
|
|
||||||
set_source_files_properties(transform_sse42.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_SSE42_FLAGS}")
|
|
||||||
endif()
|
|
||||||
if(CXX_HAS_AVX2)
|
if(CXX_HAS_AVX2)
|
||||||
set_source_files_properties(transform_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_FLAGS}")
|
set_source_files_properties(transform_avx2.cpp PROPERTIES COMPILE_FLAGS "${CYCLES_AVX2_FLAGS}")
|
||||||
endif()
|
endif()
|
||||||
|
|||||||
@@ -27,7 +27,6 @@ void DebugFlags::CPU::reset()
|
|||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
CHECK_CPU_FLAGS(avx2, "CYCLES_CPU_NO_AVX2");
|
CHECK_CPU_FLAGS(avx2, "CYCLES_CPU_NO_AVX2");
|
||||||
CHECK_CPU_FLAGS(sse42, "CYCLES_CPU_NO_SSE42");
|
|
||||||
|
|
||||||
#undef STRINGIFY
|
#undef STRINGIFY
|
||||||
#undef CHECK_CPU_FLAGS
|
#undef CHECK_CPU_FLAGS
|
||||||
|
|||||||
@@ -8,16 +8,10 @@
|
|||||||
|
|
||||||
/* x86
|
/* x86
|
||||||
*
|
*
|
||||||
* Compile a regular and SSE42 kernel. */
|
* Compile a regular kernel. */
|
||||||
|
|
||||||
# if defined(i386) || defined(_M_IX86)
|
# if defined(i386) || defined(_M_IX86)
|
||||||
|
|
||||||
/* We require minimum SSE4.2 support on x86, so auto enable. */
|
|
||||||
# define __KERNEL_SSE42__
|
|
||||||
# ifdef WITH_KERNEL_SSE42
|
|
||||||
# define WITH_CYCLES_OPTIMIZED_KERNEL_SSE42
|
|
||||||
# endif
|
|
||||||
|
|
||||||
/* x86-64
|
/* x86-64
|
||||||
*
|
*
|
||||||
* Compile a regular (includes SSE4.2) and AVX2 kernel. */
|
* Compile a regular (includes SSE4.2) and AVX2 kernel. */
|
||||||
|
|||||||
@@ -408,7 +408,6 @@ ccl_device_inline float4 quat_interpolate(const float4 q1, const float4 q2, cons
|
|||||||
}
|
}
|
||||||
|
|
||||||
#ifndef __KERNEL_GPU__
|
#ifndef __KERNEL_GPU__
|
||||||
void transform_inverse_cpu_sse42(const Transform &tfm, Transform &itfm);
|
|
||||||
void transform_inverse_cpu_avx2(const Transform &tfm, Transform &itfm);
|
void transform_inverse_cpu_avx2(const Transform &tfm, Transform &itfm);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -497,11 +496,6 @@ ccl_device_inline Transform transform_inverse(const Transform tfm)
|
|||||||
transform_inverse_cpu_avx2(tfm, itfm);
|
transform_inverse_cpu_avx2(tfm, itfm);
|
||||||
return itfm;
|
return itfm;
|
||||||
}
|
}
|
||||||
if (system_cpu_support_sse42()) {
|
|
||||||
Transform itfm;
|
|
||||||
transform_inverse_cpu_sse42(tfm, itfm);
|
|
||||||
return itfm;
|
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return transform_inverse_impl(tfm);
|
return transform_inverse_impl(tfm);
|
||||||
|
|||||||
@@ -1,14 +0,0 @@
|
|||||||
/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0 */
|
|
||||||
|
|
||||||
#include "util/transform.h"
|
|
||||||
|
|
||||||
CCL_NAMESPACE_BEGIN
|
|
||||||
|
|
||||||
void transform_inverse_cpu_sse42(const Transform &tfm, Transform &itfm)
|
|
||||||
{
|
|
||||||
itfm = transform_inverse_impl(tfm);
|
|
||||||
}
|
|
||||||
|
|
||||||
CCL_NAMESPACE_END
|
|
||||||
Reference in New Issue
Block a user