The 2D->2D, 3D->3D, 4D->4D hash functions used in Voronoi node were using quite an expensive hash function. Switch these to dedicated 2D/3D/4D hash functions (pcg2d, pcg3d, pcg4d) -- these are still very good quality, but the hash function itself is 3x-4x faster. Which makes Voronoi node calculation overall be around 2x faster. In some cases when using OSL, the speedup is even larger. This visibly changes output of the Voronoi noise however. The actual noise "behaves" the same, just if someone was depending on the noise pattern being exactly like it was before, this will change the pattern. Images, more performance results and details wrt OSL are in the PR. Pull Request: https://projects.blender.org/blender/blender/pulls/139520
103 lines
2.4 KiB
C
103 lines
2.4 KiB
C
/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
|
|
*
|
|
* SPDX-License-Identifier: Apache-2.0 */
|
|
|
|
#pragma once
|
|
|
|
#include "util/types_int3.h"
|
|
|
|
CCL_NAMESPACE_BEGIN
|
|
|
|
#if !defined(__KERNEL_METAL__)
|
|
ccl_device_inline int3 min(const int3 a, const int3 b)
|
|
{
|
|
# if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE42__)
|
|
return int3(_mm_min_epi32(a.m128, b.m128));
|
|
# else
|
|
return make_int3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z));
|
|
# endif
|
|
}
|
|
|
|
ccl_device_inline int3 max(const int3 a, const int3 b)
|
|
{
|
|
# if defined(__KERNEL_SSE__) && defined(__KERNEL_SSE42__)
|
|
return int3(_mm_max_epi32(a.m128, b.m128));
|
|
# else
|
|
return make_int3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z));
|
|
# endif
|
|
}
|
|
|
|
ccl_device_inline int3 clamp(const int3 a, const int mn, const int mx)
|
|
{
|
|
# ifdef __KERNEL_SSE__
|
|
return min(max(a, make_int3(mn)), make_int3(mx));
|
|
# else
|
|
return make_int3(clamp(a.x, mn, mx), clamp(a.y, mn, mx), clamp(a.z, mn, mx));
|
|
# endif
|
|
}
|
|
|
|
ccl_device_inline int3 clamp(const int3 a, int3 &mn, const int mx)
|
|
{
|
|
# ifdef __KERNEL_SSE__
|
|
return min(max(a, mn), make_int3(mx));
|
|
# else
|
|
return make_int3(clamp(a.x, mn.x, mx), clamp(a.y, mn.y, mx), clamp(a.z, mn.z, mx));
|
|
# endif
|
|
}
|
|
|
|
ccl_device_inline bool operator==(const int3 a, const int3 b)
|
|
{
|
|
return a.x == b.x && a.y == b.y && a.z == b.z;
|
|
}
|
|
|
|
ccl_device_inline bool operator!=(const int3 a, const int3 b)
|
|
{
|
|
return !(a == b);
|
|
}
|
|
|
|
ccl_device_inline bool operator<(const int3 a, const int3 b)
|
|
{
|
|
return a.x < b.x && a.y < b.y && a.z < b.z;
|
|
}
|
|
|
|
ccl_device_inline int3 operator+(const int3 a, const int3 b)
|
|
{
|
|
# ifdef __KERNEL_SSE__
|
|
return int3(_mm_add_epi32(a.m128, b.m128));
|
|
# else
|
|
return make_int3(a.x + b.x, a.y + b.y, a.z + b.z);
|
|
# endif
|
|
}
|
|
|
|
ccl_device_inline int3 operator-(const int3 a, const int3 b)
|
|
{
|
|
# ifdef __KERNEL_SSE__
|
|
return int3(_mm_sub_epi32(a.m128, b.m128));
|
|
# else
|
|
return make_int3(a.x - b.x, a.y - b.y, a.z - b.z);
|
|
# endif
|
|
}
|
|
|
|
ccl_device_inline int3 operator>>(const int3 a, const int i)
|
|
{
|
|
return make_int3(a.x >> i, a.y >> i, a.z >> i);
|
|
}
|
|
|
|
ccl_device_inline int3 operator*(const int3 a, const int3 b)
|
|
{
|
|
return make_int3(a.x * b.x, a.y * b.y, a.z * b.z);
|
|
}
|
|
|
|
ccl_device_inline int3 operator^(const int3 a, const int3 b)
|
|
{
|
|
return make_int3(a.x ^ b.x, a.y ^ b.y, a.z ^ b.z);
|
|
}
|
|
|
|
ccl_device_inline int3 operator&(const int3 a, const int3 b)
|
|
{
|
|
return make_int3(a.x & b.x, a.y & b.y, a.z & b.z);
|
|
}
|
|
#endif /* !__KERNEL_METAL__ */
|
|
|
|
CCL_NAMESPACE_END
|