125 lines
3.6 KiB
C
125 lines
3.6 KiB
C
|
|
/*
|
||
|
|
* Copyright 2011-2021 Blender Foundation
|
||
|
|
*
|
||
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
|
* you may not use this file except in compliance with the License.
|
||
|
|
* You may obtain a copy of the License at
|
||
|
|
*
|
||
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||
|
|
*
|
||
|
|
* Unless required by applicable law or agreed to in writing, software
|
||
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
|
* See the License for the specific language governing permissions and
|
||
|
|
* limitations under the License.
|
||
|
|
*/
|
||
|
|
|
||
|
|
#pragma once
|
||
|
|
|
||
|
|
#include "util/util_debug.h"
|
||
|
|
#include "util/util_system.h"
|
||
|
|
|
||
|
|
CCL_NAMESPACE_BEGIN
|
||
|
|
|
||
|
|
/* A wrapper around per-microarchitecture variant of a kernel function.
|
||
|
|
*
|
||
|
|
* Provides a function-call-like API which gets routed to the most suitable implementation.
|
||
|
|
*
|
||
|
|
* For example, on a computer which only has SSE4.1 the kernel_sse41 will be used. */
|
||
|
|
template<typename FunctionType> class CPUKernelFunction {
|
||
|
|
public:
|
||
|
|
CPUKernelFunction(FunctionType kernel_default,
|
||
|
|
FunctionType kernel_sse2,
|
||
|
|
FunctionType kernel_sse3,
|
||
|
|
FunctionType kernel_sse41,
|
||
|
|
FunctionType kernel_avx,
|
||
|
|
FunctionType kernel_avx2)
|
||
|
|
{
|
||
|
|
kernel_info_ = get_best_kernel_info(
|
||
|
|
kernel_default, kernel_sse2, kernel_sse3, kernel_sse41, kernel_avx, kernel_avx2);
|
||
|
|
}
|
||
|
|
|
||
|
|
template<typename... Args> inline auto operator()(Args... args) const
|
||
|
|
{
|
||
|
|
assert(kernel_info_.kernel);
|
||
|
|
|
||
|
|
return kernel_info_.kernel(args...);
|
||
|
|
}
|
||
|
|
|
||
|
|
const char *get_uarch_name() const
|
||
|
|
{
|
||
|
|
return kernel_info_.uarch_name;
|
||
|
|
}
|
||
|
|
|
||
|
|
protected:
|
||
|
|
/* Helper class which allows to pass human-readable microarchitecture name together with function
|
||
|
|
* pointer. */
|
||
|
|
class KernelInfo {
|
||
|
|
public:
|
||
|
|
KernelInfo() : KernelInfo("", nullptr)
|
||
|
|
{
|
||
|
|
}
|
||
|
|
|
||
|
|
/* TODO(sergey): Use string view, to have higher-level functionality (i.e. comparison) without
|
||
|
|
* memory allocation. */
|
||
|
|
KernelInfo(const char *uarch_name, FunctionType kernel)
|
||
|
|
: uarch_name(uarch_name), kernel(kernel)
|
||
|
|
{
|
||
|
|
}
|
||
|
|
|
||
|
|
const char *uarch_name;
|
||
|
|
FunctionType kernel;
|
||
|
|
};
|
||
|
|
|
||
|
|
KernelInfo get_best_kernel_info(FunctionType kernel_default,
|
||
|
|
FunctionType kernel_sse2,
|
||
|
|
FunctionType kernel_sse3,
|
||
|
|
FunctionType kernel_sse41,
|
||
|
|
FunctionType kernel_avx,
|
||
|
|
FunctionType kernel_avx2)
|
||
|
|
{
|
||
|
|
/* Silence warnings about unused variables when compiling without some architectures. */
|
||
|
|
(void)kernel_sse2;
|
||
|
|
(void)kernel_sse3;
|
||
|
|
(void)kernel_sse41;
|
||
|
|
(void)kernel_avx;
|
||
|
|
(void)kernel_avx2;
|
||
|
|
|
||
|
|
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
|
||
|
|
if (DebugFlags().cpu.has_avx2() && system_cpu_support_avx2()) {
|
||
|
|
return KernelInfo("AVX2", kernel_avx2);
|
||
|
|
}
|
||
|
|
#endif
|
||
|
|
|
||
|
|
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
|
||
|
|
if (DebugFlags().cpu.has_avx() && system_cpu_support_avx()) {
|
||
|
|
return KernelInfo("AVX", kernel_avx);
|
||
|
|
}
|
||
|
|
#endif
|
||
|
|
|
||
|
|
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
|
||
|
|
if (DebugFlags().cpu.has_sse41() && system_cpu_support_sse41()) {
|
||
|
|
return KernelInfo("SSE4.1", kernel_sse41);
|
||
|
|
}
|
||
|
|
#endif
|
||
|
|
|
||
|
|
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
|
||
|
|
if (DebugFlags().cpu.has_sse3() && system_cpu_support_sse3()) {
|
||
|
|
return KernelInfo("SSE3", kernel_sse3);
|
||
|
|
}
|
||
|
|
#endif
|
||
|
|
|
||
|
|
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
|
||
|
|
if (DebugFlags().cpu.has_sse2() && system_cpu_support_sse2()) {
|
||
|
|
return KernelInfo("SSE2", kernel_sse2);
|
||
|
|
}
|
||
|
|
#endif
|
||
|
|
|
||
|
|
return KernelInfo("default", kernel_default);
|
||
|
|
}
|
||
|
|
|
||
|
|
KernelInfo kernel_info_;
|
||
|
|
};
|
||
|
|
|
||
|
|
CCL_NAMESPACE_END
|