Files
test2/intern/cycles/device/cpu/kernel_function.h
Brecht Van Lommel fd25e883e2 Cycles: remove prefix from source code file names
Remove prefix of filenames that is the same as the folder name. This used
to help when #includes were using individual files, but now they are always
relative to the cycles root directory and so the prefixes are redundant.

For patches and branches, git merge and rebase should be able to detect the
renames and move over code to the right file.
2021-10-26 15:37:04 +02:00

125 lines
3.6 KiB
C++

/*
* Copyright 2011-2021 Blender Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once
#include "util/debug.h"
#include "util/system.h"
CCL_NAMESPACE_BEGIN
/* A wrapper around per-microarchitecture variant of a kernel function.
*
* Provides a function-call-like API which gets routed to the most suitable implementation.
*
* For example, on a computer which only has SSE4.1 the kernel_sse41 will be used. */
template<typename FunctionType> class CPUKernelFunction {
public:
CPUKernelFunction(FunctionType kernel_default,
FunctionType kernel_sse2,
FunctionType kernel_sse3,
FunctionType kernel_sse41,
FunctionType kernel_avx,
FunctionType kernel_avx2)
{
kernel_info_ = get_best_kernel_info(
kernel_default, kernel_sse2, kernel_sse3, kernel_sse41, kernel_avx, kernel_avx2);
}
template<typename... Args> inline auto operator()(Args... args) const
{
assert(kernel_info_.kernel);
return kernel_info_.kernel(args...);
}
const char *get_uarch_name() const
{
return kernel_info_.uarch_name;
}
protected:
/* Helper class which allows to pass human-readable microarchitecture name together with function
* pointer. */
class KernelInfo {
public:
KernelInfo() : KernelInfo("", nullptr)
{
}
/* TODO(sergey): Use string view, to have higher-level functionality (i.e. comparison) without
* memory allocation. */
KernelInfo(const char *uarch_name, FunctionType kernel)
: uarch_name(uarch_name), kernel(kernel)
{
}
const char *uarch_name;
FunctionType kernel;
};
KernelInfo get_best_kernel_info(FunctionType kernel_default,
FunctionType kernel_sse2,
FunctionType kernel_sse3,
FunctionType kernel_sse41,
FunctionType kernel_avx,
FunctionType kernel_avx2)
{
/* Silence warnings about unused variables when compiling without some architectures. */
(void)kernel_sse2;
(void)kernel_sse3;
(void)kernel_sse41;
(void)kernel_avx;
(void)kernel_avx2;
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
if (DebugFlags().cpu.has_avx2() && system_cpu_support_avx2()) {
return KernelInfo("AVX2", kernel_avx2);
}
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX
if (DebugFlags().cpu.has_avx() && system_cpu_support_avx()) {
return KernelInfo("AVX", kernel_avx);
}
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE41
if (DebugFlags().cpu.has_sse41() && system_cpu_support_sse41()) {
return KernelInfo("SSE4.1", kernel_sse41);
}
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE3
if (DebugFlags().cpu.has_sse3() && system_cpu_support_sse3()) {
return KernelInfo("SSE3", kernel_sse3);
}
#endif
#ifdef WITH_CYCLES_OPTIMIZED_KERNEL_SSE2
if (DebugFlags().cpu.has_sse2() && system_cpu_support_sse2()) {
return KernelInfo("SSE2", kernel_sse2);
}
#endif
return KernelInfo("default", kernel_default);
}
KernelInfo kernel_info_;
};
CCL_NAMESPACE_END