diff --git a/intern/cycles/app/cycles_precompute.cpp b/intern/cycles/app/cycles_precompute.cpp index ccfd3ab96b4..8671376368d 100644 --- a/intern/cycles/app/cycles_precompute.cpp +++ b/intern/cycles/app/cycles_precompute.cpp @@ -16,7 +16,7 @@ #include "kernel/sample/lcg.h" #include "kernel/sample/mapping.h" -#include "kernel/util/color.h" +#include "kernel/util/colorspace.h" #include "kernel/closure/bsdf_microfacet.h" diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 2816064303b..a1a03f3ad0f 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -352,7 +352,7 @@ set(SRC_KERNEL_SAMPLE_HEADERS ) set(SRC_KERNEL_UTIL_HEADERS - util/color.h + util/colorspace.h util/differential.h util/ies.h util/lookup_table.h @@ -390,6 +390,7 @@ set(SRC_UTIL_HEADERS ../util/half.h ../util/hash.h ../util/math.h + ../util/math_base.h ../util/math_fast.h ../util/math_intersect.h ../util/math_float2.h @@ -409,35 +410,22 @@ set(SRC_UTIL_HEADERS ../util/transform_inverse.h ../util/texture.h ../util/types.h + ../util/types_base.h ../util/types_float2.h - ../util/types_float2_impl.h ../util/types_float3.h - ../util/types_float3_impl.h ../util/types_float4.h - ../util/types_float4_impl.h ../util/types_float8.h - ../util/types_float8_impl.h ../util/types_int2.h - ../util/types_int2_impl.h ../util/types_int3.h - ../util/types_int3_impl.h ../util/types_int4.h - ../util/types_int4_impl.h ../util/types_int8.h - ../util/types_int8_impl.h ../util/types_spectrum.h ../util/types_uchar2.h - ../util/types_uchar2_impl.h ../util/types_uchar3.h - ../util/types_uchar3_impl.h ../util/types_uchar4.h - ../util/types_uchar4_impl.h ../util/types_uint2.h - ../util/types_uint2_impl.h ../util/types_uint3.h - ../util/types_uint3_impl.h ../util/types_uint4.h - ../util/types_uint4_impl.h ../util/types_ushort4.h ) diff --git a/intern/cycles/kernel/bake/bake.h b/intern/cycles/kernel/bake/bake.h index d9123a3ff44..03c4726ff00 100644 --- a/intern/cycles/kernel/bake/bake.h +++ b/intern/cycles/kernel/bake/bake.h @@ -10,7 +10,7 @@ #include "kernel/geom/geom.h" -#include "kernel/util/color.h" +#include "kernel/util/colorspace.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h b/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h index 7a2373f037f..ed77cf5f29c 100644 --- a/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h +++ b/intern/cycles/kernel/closure/bsdf_diffuse_ramp.h @@ -8,7 +8,7 @@ #pragma once #include "kernel/sample/mapping.h" -#include "kernel/util/color.h" +#include "kernel/util/colorspace.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/kernel/closure/bsdf_phong_ramp.h b/intern/cycles/kernel/closure/bsdf_phong_ramp.h index 9863d939e7a..8c48816f64d 100644 --- a/intern/cycles/kernel/closure/bsdf_phong_ramp.h +++ b/intern/cycles/kernel/closure/bsdf_phong_ramp.h @@ -7,7 +7,7 @@ #pragma once -#include "kernel/util/color.h" +#include "kernel/util/colorspace.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/kernel/closure/bsdf_principled_hair_chiang.h b/intern/cycles/kernel/closure/bsdf_principled_hair_chiang.h index 94612b8a931..57de81d75c3 100644 --- a/intern/cycles/kernel/closure/bsdf_principled_hair_chiang.h +++ b/intern/cycles/kernel/closure/bsdf_principled_hair_chiang.h @@ -11,7 +11,7 @@ # include #endif -#include "kernel/util/color.h" +#include "kernel/util/colorspace.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/kernel/closure/bsdf_principled_hair_huang.h b/intern/cycles/kernel/closure/bsdf_principled_hair_huang.h index 81e1a6e50dc..b892cbacf96 100644 --- a/intern/cycles/kernel/closure/bsdf_principled_hair_huang.h +++ b/intern/cycles/kernel/closure/bsdf_principled_hair_huang.h @@ -10,7 +10,7 @@ #include "kernel/closure/bsdf_util.h" #include "kernel/sample/lcg.h" -#include "kernel/util/color.h" +#include "kernel/util/colorspace.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/kernel/closure/bsdf_util.h b/intern/cycles/kernel/closure/bsdf_util.h index b05be74b928..c6cedea83d3 100644 --- a/intern/cycles/kernel/closure/bsdf_util.h +++ b/intern/cycles/kernel/closure/bsdf_util.h @@ -7,6 +7,8 @@ #pragma once +#include "util/color.h" + CCL_NAMESPACE_BEGIN /* Compute fresnel reflectance for perpendicular (aka S-) and parallel (aka P-) polarized light. diff --git a/intern/cycles/kernel/film/read.h b/intern/cycles/kernel/film/read.h index 0ed00220fd2..1052d38c410 100644 --- a/intern/cycles/kernel/film/read.h +++ b/intern/cycles/kernel/film/read.h @@ -8,6 +8,8 @@ #pragma once +#include "util/color.h" + CCL_NAMESPACE_BEGIN /* -------------------------------------------------------------------- diff --git a/intern/cycles/kernel/film/write.h b/intern/cycles/kernel/film/write.h index 43840da56be..42d79453503 100644 --- a/intern/cycles/kernel/film/write.h +++ b/intern/cycles/kernel/film/write.h @@ -4,7 +4,7 @@ #pragma once -#include "kernel/util/color.h" +#include "kernel/util/colorspace.h" #ifdef __KERNEL_GPU__ # define __ATOMIC_PASS_WRITE__ diff --git a/intern/cycles/kernel/integrator/guiding.h b/intern/cycles/kernel/integrator/guiding.h index 9478947c990..56b0c168b4e 100644 --- a/intern/cycles/kernel/integrator/guiding.h +++ b/intern/cycles/kernel/integrator/guiding.h @@ -8,6 +8,8 @@ #include "kernel/closure/bsdf.h" #include "kernel/film/write.h" +#include "util/color.h" + CCL_NAMESPACE_BEGIN /* Utilities. */ diff --git a/intern/cycles/kernel/integrator/shade_volume.h b/intern/cycles/kernel/integrator/shade_volume.h index 958f73046b8..e305017ee61 100644 --- a/intern/cycles/kernel/integrator/shade_volume.h +++ b/intern/cycles/kernel/integrator/shade_volume.h @@ -18,6 +18,8 @@ #include "kernel/light/light.h" #include "kernel/light/sample.h" +#include "util/color.h" + CCL_NAMESPACE_BEGIN #ifdef __VOLUME__ diff --git a/intern/cycles/kernel/integrator/subsurface_random_walk.h b/intern/cycles/kernel/integrator/subsurface_random_walk.h index 40b1175fb0e..e03ddddb284 100644 --- a/intern/cycles/kernel/integrator/subsurface_random_walk.h +++ b/intern/cycles/kernel/integrator/subsurface_random_walk.h @@ -8,6 +8,8 @@ #include "kernel/integrator/guiding.h" +#include "util/color.h" + CCL_NAMESPACE_BEGIN #ifdef __SUBSURFACE__ diff --git a/intern/cycles/kernel/osl/services.cpp b/intern/cycles/kernel/osl/services.cpp index 4d922b6c251..ebeddec36c3 100644 --- a/intern/cycles/kernel/osl/services.cpp +++ b/intern/cycles/kernel/osl/services.cpp @@ -45,8 +45,6 @@ #include "kernel/svm/svm.h" -#include "kernel/util/color.h" - CCL_NAMESPACE_BEGIN /* RenderServices implementation */ diff --git a/intern/cycles/kernel/osl/services_gpu.h b/intern/cycles/kernel/osl/services_gpu.h index ee25fe2e06a..204444dcf97 100644 --- a/intern/cycles/kernel/osl/services_gpu.h +++ b/intern/cycles/kernel/osl/services_gpu.h @@ -255,7 +255,7 @@ ccl_device_extern ccl_private OSLClosure *osl_allocate_weighted_closure_componen /* Utilities */ #include "kernel/svm/math_util.h" -#include "kernel/util/color.h" +#include "kernel/util/colorspace.h" ccl_device_extern void osl_error(ccl_private ShaderGlobals *sg, const char *format, void *args) {} diff --git a/intern/cycles/kernel/svm/closure.h b/intern/cycles/kernel/svm/closure.h index a659e2cc1be..18f5f9706ad 100644 --- a/intern/cycles/kernel/svm/closure.h +++ b/intern/cycles/kernel/svm/closure.h @@ -9,7 +9,7 @@ #include "kernel/closure/bsdf_util.h" #include "kernel/closure/emissive.h" -#include "kernel/util/color.h" +#include "kernel/util/colorspace.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/kernel/util/color.h b/intern/cycles/kernel/util/colorspace.h similarity index 98% rename from intern/cycles/kernel/util/color.h rename to intern/cycles/kernel/util/colorspace.h index 7c8affc13b7..0221552be24 100644 --- a/intern/cycles/kernel/util/color.h +++ b/intern/cycles/kernel/util/colorspace.h @@ -4,8 +4,6 @@ #pragma once -#include "util/color.h" - CCL_NAMESPACE_BEGIN ccl_device float3 xyz_to_rgb(KernelGlobals kg, float3 xyz) diff --git a/intern/cycles/scene/shader_graph.h b/intern/cycles/scene/shader_graph.h index c3f9d3eec15..28b00349e0b 100644 --- a/intern/cycles/scene/shader_graph.h +++ b/intern/cycles/scene/shader_graph.h @@ -173,7 +173,7 @@ class ShaderNode : public Node { /* Simplify settings used by artists to the ones which are simpler to * evaluate in the kernel but keep the final result unchanged. */ - virtual void simplify_settings(Scene * /*scene*/) {}; + virtual void simplify_settings(Scene * /*scene*/){}; virtual bool has_surface_emission() { diff --git a/intern/cycles/util/CMakeLists.txt b/intern/cycles/util/CMakeLists.txt index c7288b078f9..9fa9611bc3b 100644 --- a/intern/cycles/util/CMakeLists.txt +++ b/intern/cycles/util/CMakeLists.txt @@ -43,6 +43,7 @@ set(SRC_HEADERS array.h atomic.h boundbox.h + color.h debug.h defines.h deque.h @@ -60,6 +61,7 @@ set(SRC_HEADERS log.h map.h math.h + math_base.h math_cdf.h math_fast.h math_intersect.h @@ -101,35 +103,22 @@ set(SRC_HEADERS transform.h transform_inverse.h types.h + types_base.h types_float2.h - types_float2_impl.h types_float3.h - types_float3_impl.h types_float4.h - types_float4_impl.h types_float8.h - types_float8_impl.h types_int2.h - types_int2_impl.h types_int3.h - types_int3_impl.h types_int4.h - types_int4_impl.h types_int8.h - types_int8_impl.h types_spectrum.h types_uchar2.h - types_uchar2_impl.h types_uchar3.h - types_uchar3_impl.h types_uchar4.h - types_uchar4_impl.h types_uint2.h - types_uint2_impl.h types_uint3.h - types_uint3_impl.h types_uint4.h - types_uint4_impl.h types_ushort4.h unique_ptr.h vector.h diff --git a/intern/cycles/util/algorithm.h b/intern/cycles/util/algorithm.h index 9124850268e..6bda4aa1bdc 100644 --- a/intern/cycles/util/algorithm.h +++ b/intern/cycles/util/algorithm.h @@ -12,5 +12,6 @@ using std::remove; using std::sort; using std::stable_sort; using std::swap; +using std::upper_bound; CCL_NAMESPACE_END diff --git a/intern/cycles/util/color.h b/intern/cycles/util/color.h index d43b2a5e5bb..ce121f8c472 100644 --- a/intern/cycles/util/color.h +++ b/intern/cycles/util/color.h @@ -2,8 +2,7 @@ * * SPDX-License-Identifier: Apache-2.0 */ -#ifndef __UTIL_COLOR_H__ -#define __UTIL_COLOR_H__ +#pragma once #include "util/math.h" #include "util/types.h" @@ -351,6 +350,67 @@ ccl_device float3 color_highlight_uncompress(float3 color) return exp(color) - one_float3(); } -CCL_NAMESPACE_END +/* Color division */ -#endif /* __UTIL_COLOR_H__ */ +ccl_device_inline Spectrum safe_invert_color(Spectrum a) +{ + FOREACH_SPECTRUM_CHANNEL (i) { + GET_SPECTRUM_CHANNEL(a, i) = (GET_SPECTRUM_CHANNEL(a, i) != 0.0f) ? + 1.0f / GET_SPECTRUM_CHANNEL(a, i) : + 0.0f; + } + + return a; +} + +/* Returns `a/b`, and replace the channel value with `fallback` if `b == 0`. */ +ccl_device_inline Spectrum safe_divide_color(Spectrum a, Spectrum b, const float fallback = 0.0f) +{ + FOREACH_SPECTRUM_CHANNEL (i) { + GET_SPECTRUM_CHANNEL(a, i) = (GET_SPECTRUM_CHANNEL(b, i) != 0.0f) ? + GET_SPECTRUM_CHANNEL(a, i) / GET_SPECTRUM_CHANNEL(b, i) : + fallback; + } + + return a; +} + +ccl_device_inline float3 safe_divide_even_color(float3 a, float3 b) +{ + float x, y, z; + + x = (b.x != 0.0f) ? a.x / b.x : 0.0f; + y = (b.y != 0.0f) ? a.y / b.y : 0.0f; + z = (b.z != 0.0f) ? a.z / b.z : 0.0f; + + /* try to get gray even if b is zero */ + if (b.x == 0.0f) { + if (b.y == 0.0f) { + x = z; + y = z; + } + else if (b.z == 0.0f) { + x = y; + z = y; + } + else { + x = 0.5f * (y + z); + } + } + else if (b.y == 0.0f) { + if (b.z == 0.0f) { + y = x; + z = x; + } + else { + y = 0.5f * (x + z); + } + } + else if (b.z == 0.0f) { + z = 0.5f * (x + y); + } + + return make_float3(x, y, z); +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/util/math.h b/intern/cycles/util/math.h index 8c83fa9f6a0..999ed80abd6 100644 --- a/intern/cycles/util/math.h +++ b/intern/cycles/util/math.h @@ -2,1089 +2,21 @@ * * SPDX-License-Identifier: Apache-2.0 */ -#ifndef __UTIL_MATH_H__ -#define __UTIL_MATH_H__ +#pragma once -/* Math - * - * Basic math functions on scalar and vector types. This header is used by - * both the kernel code when compiled as C++, and other C++ non-kernel code. */ +#include "util/types.h" // IWYU pragma: export -#ifndef __KERNEL_GPU__ -# include -#endif +#include "util/math_base.h" // IWYU pragma: export -#ifdef __HIP__ -# include -#endif +#include "util/math_int2.h" // IWYU pragma: export +#include "util/math_int3.h" // IWYU pragma: export +#include "util/math_int4.h" // IWYU pragma: export +#include "util/math_int8.h" // IWYU pragma: export -#if !defined(__KERNEL_METAL__) -# include -# include -# include -#endif /* !defined(__KERNEL_METAL__) */ +#include "util/math_float2.h" // IWYU pragma: export +#include "util/math_float4.h" // IWYU pragma: export +#include "util/math_float8.h" // IWYU pragma: export -#include "util/types.h" +#include "util/math_float3.h" // IWYU pragma: export -CCL_NAMESPACE_BEGIN - -/* Float Pi variations */ - -/* Division */ -#ifndef M_PI_F -# define M_PI_F (3.1415926535897932f) /* pi */ -#endif -#ifndef M_PI_2_F -# define M_PI_2_F (1.5707963267948966f) /* pi/2 */ -#endif -#ifndef M_PI_4_F -# define M_PI_4_F (0.7853981633974830f) /* pi/4 */ -#endif -#ifndef M_1_PI_F -# define M_1_PI_F (0.3183098861837067f) /* 1/pi */ -#endif -#ifndef M_2_PI_F -# define M_2_PI_F (0.6366197723675813f) /* 2/pi */ -#endif -#ifndef M_1_2PI_F -# define M_1_2PI_F (0.1591549430918953f) /* 1/(2*pi) */ -#endif -#ifndef M_1_4PI_F -# define M_1_4PI_F (0.0795774715459476f) /* 1/(4*pi) */ -#endif -#ifndef M_SQRT_PI_8_F -# define M_SQRT_PI_8_F (0.6266570686577501f) /* sqrt(pi/8) */ -#endif -#ifndef M_LN_2PI_F -# define M_LN_2PI_F (1.8378770664093454f) /* ln(2*pi) */ -#endif - -/* Multiplication */ -#ifndef M_2PI_F -# define M_2PI_F (6.2831853071795864f) /* 2*pi */ -#endif -#ifndef M_4PI_F -# define M_4PI_F (12.566370614359172f) /* 4*pi */ -#endif -#ifndef M_PI_4F -# define M_PI_4F 0.78539816339744830962f /* pi/4 */ -#endif - -/* Float sqrt variations */ -#ifndef M_SQRT2_F -# define M_SQRT2_F (1.4142135623730950f) /* sqrt(2) */ -#endif -#ifndef M_CBRT2_F -# define M_CBRT2_F 1.2599210498948732f /* cbrt(2) */ -#endif -#ifndef M_SQRT1_2F -# define M_SQRT1_2F 0.70710678118654752440f /* sqrt(1/2) */ -#endif -#ifndef M_SQRT3_F -# define M_SQRT3_F (1.7320508075688772f) /* sqrt(3) */ -#endif -#ifndef M_LN2_F -# define M_LN2_F (0.6931471805599453f) /* ln(2) */ -#endif -#ifndef M_LN10_F -# define M_LN10_F (2.3025850929940457f) /* ln(10) */ -#endif - -/* Scalar */ - -#if !defined(__HIP__) && !defined(__KERNEL_ONEAPI__) -# ifdef _WIN32 -ccl_device_inline float fmaxf(float a, float b) -{ - return (a > b) ? a : b; -} - -ccl_device_inline float fminf(float a, float b) -{ - return (a < b) ? a : b; -} - -# endif /* _WIN32 */ -#endif /* __HIP__, __KERNEL_ONEAPI__ */ - -#if !defined(__KERNEL_GPU__) || defined(__KERNEL_ONEAPI__) -# ifndef __KERNEL_ONEAPI__ -using std::isfinite; -using std::isnan; -using std::sqrt; -# else -# define isfinite(x) sycl::isfinite((x)) -# define isnan(x) sycl::isnan((x)) -# endif - -ccl_device_inline int abs(int x) -{ - return (x > 0) ? x : -x; -} - -ccl_device_inline int max(int a, int b) -{ - return (a > b) ? a : b; -} - -ccl_device_inline int min(int a, int b) -{ - return (a < b) ? a : b; -} - -ccl_device_inline uint32_t max(uint32_t a, uint32_t b) -{ - return (a > b) ? a : b; -} - -ccl_device_inline uint32_t min(uint32_t a, uint32_t b) -{ - return (a < b) ? a : b; -} - -ccl_device_inline uint64_t max(uint64_t a, uint64_t b) -{ - return (a > b) ? a : b; -} - -ccl_device_inline uint64_t min(uint64_t a, uint64_t b) -{ - return (a < b) ? a : b; -} - -/* NOTE: On 64bit Darwin the `size_t` is defined as `unsigned long int` and `uint64_t` is defined - * as `unsigned long long`. Both of the definitions are 64 bit unsigned integer, but the automatic - * substitution does not allow to automatically pick function defined for `uint64_t` as it is not - * exactly the same type definition. - * Work this around by adding a templated function enabled for `size_t` type which will be used - * when there is no explicit specialization of `min()`/`max()` above. */ - -template -ccl_device_inline typename std::enable_if_t, T> max(T a, T b) -{ - return (a > b) ? a : b; -} - -template -ccl_device_inline typename std::enable_if_t, T> min(T a, T b) -{ - return (a < b) ? a : b; -} - -ccl_device_inline float max(float a, float b) -{ - return (a > b) ? a : b; -} - -ccl_device_inline float min(float a, float b) -{ - return (a < b) ? a : b; -} - -ccl_device_inline double max(double a, double b) -{ - return (a > b) ? a : b; -} - -ccl_device_inline double min(double a, double b) -{ - return (a < b) ? a : b; -} - -/* These 2 guys are templated for usage with registers data. - * - * NOTE: Since this is CPU-only functions it is ok to use references here. - * But for other devices we'll need to be careful about this. - */ - -template ccl_device_inline T min4(const T &a, const T &b, const T &c, const T &d) -{ - return min(min(a, b), min(c, d)); -} - -template ccl_device_inline T max4(const T &a, const T &b, const T &c, const T &d) -{ - return max(max(a, b), max(c, d)); -} -#endif /* __KERNEL_GPU__ */ - -ccl_device_inline float min4(float a, float b, float c, float d) -{ - return min(min(a, b), min(c, d)); -} - -ccl_device_inline float max4(float a, float b, float c, float d) -{ - return max(max(a, b), max(c, d)); -} - -#if !defined(__KERNEL_METAL__) && !defined(__KERNEL_ONEAPI__) -/* Int/Float conversion */ - -ccl_device_inline int as_int(uint i) -{ - union { - uint ui; - int i; - } u; - u.ui = i; - return u.i; -} - -ccl_device_inline uint as_uint(int i) -{ - union { - uint ui; - int i; - } u; - u.i = i; - return u.ui; -} - -ccl_device_inline uint as_uint(float f) -{ - union { - uint i; - float f; - } u; - u.f = f; - return u.i; -} - -# ifndef __HIP__ -ccl_device_inline int __float_as_int(float f) -{ - union { - int i; - float f; - } u; - u.f = f; - return u.i; -} - -ccl_device_inline float __int_as_float(int i) -{ - union { - int i; - float f; - } u; - u.i = i; - return u.f; -} - -ccl_device_inline uint __float_as_uint(float f) -{ - union { - uint i; - float f; - } u; - u.f = f; - return u.i; -} - -ccl_device_inline float __uint_as_float(uint i) -{ - union { - uint i; - float f; - } u; - u.i = i; - return u.f; -} -# endif - -ccl_device_inline int4 __float4_as_int4(float4 f) -{ -# ifdef __KERNEL_SSE__ - return int4(_mm_castps_si128(f.m128)); -# else - return make_int4( - __float_as_int(f.x), __float_as_int(f.y), __float_as_int(f.z), __float_as_int(f.w)); -# endif -} - -ccl_device_inline float4 __int4_as_float4(int4 i) -{ -# ifdef __KERNEL_SSE__ - return float4(_mm_castsi128_ps(i.m128)); -# else - return make_float4( - __int_as_float(i.x), __int_as_float(i.y), __int_as_float(i.z), __int_as_float(i.w)); -# endif -} -#endif /* !defined(__KERNEL_METAL__) */ - -#if defined(__KERNEL_METAL__) -ccl_device_forceinline bool isnan_safe(float f) -{ - return isnan(f); -} - -ccl_device_forceinline bool isfinite_safe(float f) -{ - return isfinite(f); -} -#else -template ccl_device_inline uint pointer_pack_to_uint_0(T *ptr) -{ - return ((uint64_t)ptr) & 0xFFFFFFFF; -} - -template ccl_device_inline uint pointer_pack_to_uint_1(T *ptr) -{ - return (((uint64_t)ptr) >> 32) & 0xFFFFFFFF; -} - -template ccl_device_inline T *pointer_unpack_from_uint(const uint a, const uint b) -{ - return (T *)(((uint64_t)b << 32) | a); -} - -ccl_device_inline uint uint16_pack_to_uint(const uint a, const uint b) -{ - return (a << 16) | b; -} - -ccl_device_inline uint uint16_unpack_from_uint_0(const uint i) -{ - return i >> 16; -} - -ccl_device_inline uint uint16_unpack_from_uint_1(const uint i) -{ - return i & 0xFFFF; -} - -/* Versions of functions which are safe for fast math. */ -ccl_device_inline bool isnan_safe(float f) -{ - unsigned int x = __float_as_uint(f); - return (x << 1) > 0xff000000u; -} - -ccl_device_inline bool isfinite_safe(float f) -{ - /* By IEEE 754 rule, 2*Inf equals Inf */ - unsigned int x = __float_as_uint(f); - return (f == f) && (x == 0 || x == (1u << 31) || (f != 2.0f * f)) && !((x << 1) > 0xff000000u); -} -#endif - -ccl_device_inline float ensure_finite(float v) -{ - return isfinite_safe(v) ? v : 0.0f; -} - -#if !defined(__KERNEL_METAL__) -ccl_device_inline int clamp(int a, int mn, int mx) -{ - return min(max(a, mn), mx); -} - -ccl_device_inline float clamp(float a, float mn, float mx) -{ - return min(max(a, mn), mx); -} - -ccl_device_inline float mix(float a, float b, float t) -{ - return a + t * (b - a); -} - -ccl_device_inline float smoothstep(float edge0, float edge1, float x) -{ - float result; - if (x < edge0) { - result = 0.0f; - } - else if (x >= edge1) { - result = 1.0f; - } - else { - float t = (x - edge0) / (edge1 - edge0); - result = (3.0f - 2.0f * t) * (t * t); - } - return result; -} - -#endif /* !defined(__KERNEL_METAL__) */ - -#if defined(__KERNEL_CUDA__) -ccl_device_inline float saturatef(float a) -{ - return __saturatef(a); -} -#elif !defined(__KERNEL_METAL__) -ccl_device_inline float saturatef(float a) -{ - return clamp(a, 0.0f, 1.0f); -} -#endif /* __KERNEL_CUDA__ */ - -ccl_device_inline int float_to_int(float f) -{ - return (int)f; -} - -ccl_device_inline int floor_to_int(float f) -{ - return float_to_int(floorf(f)); -} - -ccl_device_inline float floorfrac(float x, ccl_private int *i) -{ - float f = floorf(x); - *i = float_to_int(f); - return x - f; -} - -ccl_device_inline int ceil_to_int(float f) -{ - return float_to_int(ceilf(f)); -} - -ccl_device_inline float fractf(float x) -{ - return x - floorf(x); -} - -/* Adapted from `godot-engine` math_funcs.h. */ -ccl_device_inline float wrapf(float value, float max, float min) -{ - float range = max - min; - return (range != 0.0f) ? value - (range * floorf((value - min) / range)) : min; -} - -ccl_device_inline float pingpongf(float a, float b) -{ - return (b != 0.0f) ? fabsf(fractf((a - b) / (b * 2.0f)) * b * 2.0f - b) : 0.0f; -} - -ccl_device_inline float smoothminf(float a, float b, float k) -{ - if (k != 0.0f) { - float h = fmaxf(k - fabsf(a - b), 0.0f) / k; - return fminf(a, b) - h * h * h * k * (1.0f / 6.0f); - } - else { - return fminf(a, b); - } -} - -ccl_device_inline float signf(float f) -{ - return (f < 0.0f) ? -1.0f : 1.0f; -} - -ccl_device_inline float nonzerof(float f, float eps) -{ - if (fabsf(f) < eps) { - return signf(f) * eps; - } - else { - return f; - } -} - -/* The behavior of `atan2(0, 0)` is undefined on many platforms, to ensure consistent behavior, we - * return 0 in this case. See !126951. - * Computes the angle between the positive x axis and the vector pointing from origin to (x, y). */ -ccl_device_inline float compatible_atan2(const float y, const float x) -{ - return (x == 0.0f && y == 0.0f) ? 0.0f : atan2f(y, x); -} - -/* `signum` function testing for zero. Matches GLSL and OSL functions. */ -ccl_device_inline float compatible_signf(float f) -{ - if (f == 0.0f) { - return 0.0f; - } - else { - return signf(f); - } -} - -ccl_device_inline float smoothstepf(float f) -{ - if (f <= 0.0f) { - return 0.0f; - } - if (f >= 1.0f) { - return 1.0f; - } - float ff = f * f; - return (3.0f * ff - 2.0f * ff * f); -} - -ccl_device_inline int mod(int x, int m) -{ - return (x % m + m) % m; -} - -ccl_device_inline float inverse_lerp(float a, float b, float x) -{ - return (x - a) / (b - a); -} - -/* Cubic interpolation between b and c, a and d are the previous and next point. */ -ccl_device_inline float cubic_interp(float a, float b, float c, float d, float x) -{ - return 0.5f * - (((d + 3.0f * (b - c) - a) * x + (2.0f * a - 5.0f * b + 4.0f * c - d)) * x + - (c - a)) * - x + - b; -} - -CCL_NAMESPACE_END - -#include "util/math_int2.h" -#include "util/math_int3.h" -#include "util/math_int4.h" -#include "util/math_int8.h" - -#include "util/math_float2.h" -#include "util/math_float4.h" -#include "util/math_float8.h" - -#include "util/math_float3.h" - -#include "util/rect.h" - -CCL_NAMESPACE_BEGIN - -/* Triangle */ - -ccl_device_inline float triangle_area(ccl_private const float3 &v1, - ccl_private const float3 &v2, - ccl_private const float3 &v3) -{ - return len(cross(v3 - v2, v1 - v2)) * 0.5f; -} - -/* Orthonormal vectors */ - -ccl_device_inline void make_orthonormals(const float3 N, - ccl_private float3 *a, - ccl_private float3 *b) -{ -#if 0 - if (fabsf(N.y) >= 0.999f) { - *a = make_float3(1, 0, 0); - *b = make_float3(0, 0, 1); - return; - } - if (fabsf(N.z) >= 0.999f) { - *a = make_float3(1, 0, 0); - *b = make_float3(0, 1, 0); - return; - } -#endif - - if (N.x != N.y || N.x != N.z) - *a = make_float3(N.z - N.y, N.x - N.z, N.y - N.x); //(1,1,1)x N - else - *a = make_float3(N.z - N.y, N.x + N.z, -N.y - N.x); //(-1,1,1)x N - - *a = normalize(*a); - *b = cross(N, *a); -} - -/* Color division */ - -ccl_device_inline Spectrum safe_invert_color(Spectrum a) -{ - FOREACH_SPECTRUM_CHANNEL (i) { - GET_SPECTRUM_CHANNEL(a, i) = (GET_SPECTRUM_CHANNEL(a, i) != 0.0f) ? - 1.0f / GET_SPECTRUM_CHANNEL(a, i) : - 0.0f; - } - - return a; -} - -/* Returns `a/b`, and replace the channel value with `fallback` if `b == 0`. */ -ccl_device_inline Spectrum safe_divide_color(Spectrum a, Spectrum b, const float fallback = 0.0f) -{ - FOREACH_SPECTRUM_CHANNEL (i) { - GET_SPECTRUM_CHANNEL(a, i) = (GET_SPECTRUM_CHANNEL(b, i) != 0.0f) ? - GET_SPECTRUM_CHANNEL(a, i) / GET_SPECTRUM_CHANNEL(b, i) : - fallback; - } - - return a; -} - -ccl_device_inline float3 safe_divide_even_color(float3 a, float3 b) -{ - float x, y, z; - - x = (b.x != 0.0f) ? a.x / b.x : 0.0f; - y = (b.y != 0.0f) ? a.y / b.y : 0.0f; - z = (b.z != 0.0f) ? a.z / b.z : 0.0f; - - /* try to get gray even if b is zero */ - if (b.x == 0.0f) { - if (b.y == 0.0f) { - x = z; - y = z; - } - else if (b.z == 0.0f) { - x = y; - z = y; - } - else { - x = 0.5f * (y + z); - } - } - else if (b.y == 0.0f) { - if (b.z == 0.0f) { - y = x; - z = x; - } - else { - y = 0.5f * (x + z); - } - } - else if (b.z == 0.0f) { - z = 0.5f * (x + y); - } - - return make_float3(x, y, z); -} - -/* Rotation of point around axis and angle */ - -ccl_device_inline float3 rotate_around_axis(float3 p, float3 axis, float angle) -{ - float costheta = cosf(angle); - float sintheta = sinf(angle); - float3 r; - - r.x = ((costheta + (1 - costheta) * axis.x * axis.x) * p.x) + - (((1 - costheta) * axis.x * axis.y - axis.z * sintheta) * p.y) + - (((1 - costheta) * axis.x * axis.z + axis.y * sintheta) * p.z); - - r.y = (((1 - costheta) * axis.x * axis.y + axis.z * sintheta) * p.x) + - ((costheta + (1 - costheta) * axis.y * axis.y) * p.y) + - (((1 - costheta) * axis.y * axis.z - axis.x * sintheta) * p.z); - - r.z = (((1 - costheta) * axis.x * axis.z - axis.y * sintheta) * p.x) + - (((1 - costheta) * axis.y * axis.z + axis.x * sintheta) * p.y) + - ((costheta + (1 - costheta) * axis.z * axis.z) * p.z); - - return r; -} - -/* NaN-safe math ops */ - -ccl_device_inline float safe_sqrtf(float f) -{ - return sqrtf(max(f, 0.0f)); -} - -ccl_device_inline float inversesqrtf(float f) -{ -#if defined(__KERNEL_METAL__) - return (f > 0.0f) ? rsqrt(f) : 0.0f; -#else - return (f > 0.0f) ? 1.0f / sqrtf(f) : 0.0f; -#endif -} - -ccl_device float safe_asinf(float a) -{ - return asinf(clamp(a, -1.0f, 1.0f)); -} - -ccl_device float safe_acosf(float a) -{ - return acosf(clamp(a, -1.0f, 1.0f)); -} - -ccl_device float compatible_powf(float x, float y) -{ -#ifdef __KERNEL_GPU__ - if (y == 0.0f) /* x^0 -> 1, including 0^0 */ - return 1.0f; - - /* GPU pow doesn't accept negative x, do manual checks here */ - if (x < 0.0f) { - if (fmodf(-y, 2.0f) == 0.0f) - return powf(-x, y); - else - return -powf(-x, y); - } - else if (x == 0.0f) - return 0.0f; -#endif - return powf(x, y); -} - -ccl_device float safe_powf(float a, float b) -{ - if (UNLIKELY(a < 0.0f && b != float_to_int(b))) { - return 0.0f; - } - - return compatible_powf(a, b); -} - -ccl_device float safe_divide(float a, float b) -{ - return (b != 0.0f) ? a / b : 0.0f; -} - -ccl_device float safe_logf(float a, float b) -{ - if (UNLIKELY(a <= 0.0f || b <= 0.0f)) { - return 0.0f; - } - - return safe_divide(logf(a), logf(b)); -} - -ccl_device float safe_modulo(float a, float b) -{ - return (b != 0.0f) ? fmodf(a, b) : 0.0f; -} - -ccl_device float safe_floored_modulo(float a, float b) -{ - return (b != 0.0f) ? a - floorf(a / b) * b : 0.0f; -} - -ccl_device_inline float sqr(float a) -{ - return a * a; -} - -ccl_device_inline float sin_from_cos(const float c) -{ - return safe_sqrtf(1.0f - sqr(c)); -} - -ccl_device_inline float cos_from_sin(const float s) -{ - return safe_sqrtf(1.0f - sqr(s)); -} - -ccl_device_inline float sin_sqr_to_one_minus_cos(const float s_sq) -{ - /* Using second-order Taylor expansion at small angles for better accuracy. */ - return s_sq > 0.0004f ? 1.0f - safe_sqrtf(1.0f - s_sq) : 0.5f * s_sq; -} - -ccl_device_inline float one_minus_cos(const float angle) -{ - /* Using second-order Taylor expansion at small angles for better accuracy. */ - return angle > 0.02f ? 1.0f - cosf(angle) : 0.5f * sqr(angle); -} - -ccl_device_inline float pow20(float a) -{ - return sqr(sqr(sqr(sqr(a)) * a)); -} - -ccl_device_inline float pow22(float a) -{ - return sqr(a * sqr(sqr(sqr(a)) * a)); -} - -#ifdef __KERNEL_METAL__ -ccl_device_inline float lgammaf(float x) -{ - /* Nemes, Gergő (2010), "New asymptotic expansion for the Gamma function", Archiv der Mathematik - */ - const float _1_180 = 1.0f / 180.0f; - const float log2pi = 1.83787706641f; - const float logx = log(x); - return (log2pi - logx + - x * (logx * 2.0f + log(x * sinh(1.0f / x) + (_1_180 / pow(x, 6.0f))) - 2.0f)) * - 0.5f; -} -#endif - -ccl_device_inline float beta(float x, float y) -{ - return expf(lgammaf(x) + lgammaf(y) - lgammaf(x + y)); -} - -ccl_device_inline float xor_signmask(float x, int y) -{ - return __int_as_float(__float_as_int(x) ^ y); -} - -ccl_device float bits_to_01(uint bits) -{ - return bits * (1.0f / (float)0xFFFFFFFF); -} - -#if !defined(__KERNEL_GPU__) -# if defined(__GNUC__) -ccl_device_inline uint popcount(uint x) -{ - return __builtin_popcount(x); -} -# else -ccl_device_inline uint popcount(uint x) -{ - /* TODO(Stefan): pop-count intrinsic for Windows with fallback for older CPUs. */ - uint i = x; - i = i - ((i >> 1) & 0x55555555); - i = (i & 0x33333333) + ((i >> 2) & 0x33333333); - i = (((i + (i >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24; - return i; -} -# endif -#elif defined(__KERNEL_ONEAPI__) -# define popcount(x) sycl::popcount(x) -#elif defined(__KERNEL_HIP__) -/* Use popcll to support 64-bit wave for pre-RDNA AMD GPUs */ -# define popcount(x) __popcll(x) -#elif !defined(__KERNEL_METAL__) -# define popcount(x) __popc(x) -#endif - -ccl_device_inline uint count_leading_zeros(uint x) -{ -#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) || defined(__KERNEL_HIP__) - return __clz(x); -#elif defined(__KERNEL_METAL__) - return clz(x); -#elif defined(__KERNEL_ONEAPI__) - return sycl::clz(x); -#else - assert(x != 0); -# ifdef _MSC_VER - unsigned long leading_zero = 0; - _BitScanReverse(&leading_zero, x); - return (31 - leading_zero); -# else - return __builtin_clz(x); -# endif -#endif -} - -ccl_device_inline uint count_trailing_zeros(uint x) -{ -#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) || defined(__KERNEL_HIP__) - return (__ffs(x) - 1); -#elif defined(__KERNEL_METAL__) - return ctz(x); -#elif defined(__KERNEL_ONEAPI__) - return sycl::ctz(x); -#else - assert(x != 0); -# ifdef _MSC_VER - unsigned long ctz = 0; - _BitScanForward(&ctz, x); - return ctz; -# else - return __builtin_ctz(x); -# endif -#endif -} - -ccl_device_inline uint find_first_set(uint x) -{ -#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) || defined(__KERNEL_HIP__) - return __ffs(x); -#elif defined(__KERNEL_METAL__) - return (x != 0) ? ctz(x) + 1 : 0; -#else -# ifdef _MSC_VER - return (x != 0) ? (32 - count_leading_zeros(x & (~x + 1))) : 0; -# else - return __builtin_ffs(x); -# endif -#endif -} - -/* projections */ -ccl_device_inline float2 map_to_tube(const float3 co) -{ - float len, u, v; - len = sqrtf(co.x * co.x + co.y * co.y); - if (len > 0.0f) { - u = (1.0f - (atan2f(co.x / len, co.y / len) / M_PI_F)) * 0.5f; - v = (co.z + 1.0f) * 0.5f; - } - else { - u = v = 0.0f; - } - return make_float2(u, v); -} - -ccl_device_inline float2 map_to_sphere(const float3 co) -{ - float l = dot(co, co); - float u, v; - if (l > 0.0f) { - if (UNLIKELY(co.x == 0.0f && co.y == 0.0f)) { - u = 0.0f; /* Otherwise domain error. */ - } - else { - u = (0.5f - atan2f(co.x, co.y) * M_1_2PI_F); - } - v = 1.0f - safe_acosf(co.z / sqrtf(l)) * M_1_PI_F; - } - else { - u = v = 0.0f; - } - return make_float2(u, v); -} - -/* Compares two floats. - * Returns true if their absolute difference is smaller than abs_diff (for numbers near zero) - * or their relative difference is less than ulp_diff ULPs. - * Based on - * https://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/ - */ - -ccl_device_inline bool compare_floats(float a, float b, float abs_diff, int ulp_diff) -{ - if (fabsf(a - b) < abs_diff) { - return true; - } - - if ((a < 0.0f) != (b < 0.0f)) { - return false; - } - - return (abs(__float_as_int(a) - __float_as_int(b)) < ulp_diff); -} - -/* Calculate the angle between the two vectors a and b. - * The usual approach `acos(dot(a, b))` has severe precision issues for small angles, - * which are avoided by this method. - * Based on "Mangled Angles" from https://people.eecs.berkeley.edu/~wkahan/Mindless.pdf - */ -ccl_device_inline float precise_angle(float3 a, float3 b) -{ - return 2.0f * atan2f(len(a - b), len(a + b)); -} - -/* Tangent of the angle between vectors a and b. */ -ccl_device_inline float tan_angle(float3 a, float3 b) -{ - return len(cross(a, b)) / dot(a, b); -} - -/* Return value which is greater than the given one and is a power of two. */ -ccl_device_inline uint next_power_of_two(uint x) -{ - return x == 0 ? 1 : 1 << (32 - count_leading_zeros(x)); -} - -/* Return value which is lower than the given one and is a power of two. */ -ccl_device_inline uint prev_power_of_two(uint x) -{ - return x < 2 ? x : 1 << (31 - count_leading_zeros(x - 1)); -} - -#ifndef __has_builtin -# define __has_builtin(v) 0 -#endif - -/* Reverses the bits of a 32 bit integer. */ -ccl_device_inline uint32_t reverse_integer_bits(uint32_t x) -{ - /* Use a native instruction if it exists. */ -#if defined(__KERNEL_CUDA__) - return __brev(x); -#elif defined(__KERNEL_METAL__) - return reverse_bits(x); -#elif defined(__aarch64__) || (defined(_M_ARM64) && !defined(_MSC_VER)) - /* Assume the rbit is always available on 64bit ARM architecture. */ - __asm__("rbit %w0, %w1" : "=r"(x) : "r"(x)); - return x; -#elif defined(__arm__) && ((__ARM_ARCH > 7) || __ARM_ARCH == 6 && __ARM_ARCH_ISA_THUMB >= 2) - /* This ARM instruction is available in ARMv6T2 and above. - * This 32-bit Thumb instruction is available in ARMv6T2 and above. */ - __asm__("rbit %0, %1" : "=r"(x) : "r"(x)); - return x; -#elif __has_builtin(__builtin_bitreverse32) - return __builtin_bitreverse32(x); -#else - /* Flip pairwise. */ - x = ((x & 0x55555555) << 1) | ((x & 0xAAAAAAAA) >> 1); - /* Flip pairs. */ - x = ((x & 0x33333333) << 2) | ((x & 0xCCCCCCCC) >> 2); - /* Flip nibbles. */ - x = ((x & 0x0F0F0F0F) << 4) | ((x & 0xF0F0F0F0) >> 4); - /* Flip bytes. CPUs have an instruction for that, pretty fast one. */ -# ifdef _MSC_VER - return _byteswap_ulong(x); -# elif defined(__INTEL_COMPILER) - return (uint32_t)_bswap((int)x); -# else - /* Assuming gcc or clang. */ - return __builtin_bswap32(x); -# endif -#endif -} - -/* Solve quadratic equation a*x^2 + b*x + c = 0, adapted from Mitsuba 3 - * The solution is ordered so that x1 <= x2. - * Returns true if at least one solution is found. */ -ccl_device_inline bool solve_quadratic( - const float a, const float b, const float c, ccl_private float &x1, ccl_private float &x2) -{ - /* If the equation is linear, the solution is -c/b, but b has to be non-zero. */ - const bool valid_linear = (a == 0.0f) && (b != 0.0f); - x1 = x2 = -c / b; - - const float discriminant = sqr(b) - 4.0f * a * c; - /* Allow slightly negative discriminant in case of numerical precision issues. */ - const bool valid_quadratic = (a != 0.0f) && (discriminant > -1e-5f); - - if (valid_quadratic) { - /* Numerically stable version of (-b ± sqrt(discriminant)) / (2 * a), avoiding catastrophic - * cancellation when `b` is very close to `sqrt(discriminant)`, by finding the solution of - * greater magnitude which does not suffer from loss of precision, then using the identity - * x1 * x2 = c / a. */ - const float temp = -0.5f * (b + copysignf(safe_sqrtf(discriminant), b)); - const float r1 = temp / a; - const float r2 = c / temp; - - x1 = fminf(r1, r2); - x2 = fmaxf(r1, r2); - } - - return (valid_linear || valid_quadratic); -} - -/* Defines a closed interval [min, max]. */ -template struct Interval { - T min; - T max; - - ccl_device_inline_method bool is_empty() const - { - return min >= max; - } - - ccl_device_inline_method bool contains(T value) const - { - return value >= min && value <= max; - } - - ccl_device_inline_method T length() const - { - return max - min; - } -}; - -/* Computes the intersection of two intervals. */ -template -ccl_device_inline Interval intervals_intersection(ccl_private const Interval &first, - ccl_private const Interval &second) -{ - return {max(first.min, second.min), min(first.max, second.max)}; -} - -CCL_NAMESPACE_END - -#endif /* __UTIL_MATH_H__ */ +#include "util/rect.h" // IWYU pragma: export diff --git a/intern/cycles/util/math_base.h b/intern/cycles/util/math_base.h new file mode 100644 index 00000000000..efa6cf049b7 --- /dev/null +++ b/intern/cycles/util/math_base.h @@ -0,0 +1,868 @@ +/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation + * + * SPDX-License-Identifier: Apache-2.0 */ + +#pragma once + +/* Math + * + * Basic math functions on scalar and vector types. This header is used by + * both the kernel code when compiled as C++, and other C++ non-kernel code. */ + +#include "util/defines.h" +#include "util/types_base.h" + +#ifdef __HIP__ +# include +#endif + +#if !defined(__KERNEL_METAL__) +# include // IWYU pragma: export +# include // IWYU pragma: export +#endif + +CCL_NAMESPACE_BEGIN + +/* Float Pi variations */ + +/* Division */ +#ifndef M_PI_F +# define M_PI_F (3.1415926535897932f) /* pi */ +#endif +#ifndef M_PI_2_F +# define M_PI_2_F (1.5707963267948966f) /* pi/2 */ +#endif +#ifndef M_PI_4_F +# define M_PI_4_F (0.7853981633974830f) /* pi/4 */ +#endif +#ifndef M_1_PI_F +# define M_1_PI_F (0.3183098861837067f) /* 1/pi */ +#endif +#ifndef M_2_PI_F +# define M_2_PI_F (0.6366197723675813f) /* 2/pi */ +#endif +#ifndef M_1_2PI_F +# define M_1_2PI_F (0.1591549430918953f) /* 1/(2*pi) */ +#endif +#ifndef M_1_4PI_F +# define M_1_4PI_F (0.0795774715459476f) /* 1/(4*pi) */ +#endif +#ifndef M_SQRT_PI_8_F +# define M_SQRT_PI_8_F (0.6266570686577501f) /* sqrt(pi/8) */ +#endif +#ifndef M_LN_2PI_F +# define M_LN_2PI_F (1.8378770664093454f) /* ln(2*pi) */ +#endif + +/* Multiplication */ +#ifndef M_2PI_F +# define M_2PI_F (6.2831853071795864f) /* 2*pi */ +#endif +#ifndef M_4PI_F +# define M_4PI_F (12.566370614359172f) /* 4*pi */ +#endif +#ifndef M_PI_4F +# define M_PI_4F 0.78539816339744830962f /* pi/4 */ +#endif + +/* Float sqrt variations */ +#ifndef M_SQRT2_F +# define M_SQRT2_F (1.4142135623730950f) /* sqrt(2) */ +#endif +#ifndef M_CBRT2_F +# define M_CBRT2_F 1.2599210498948732f /* cbrt(2) */ +#endif +#ifndef M_SQRT1_2F +# define M_SQRT1_2F 0.70710678118654752440f /* sqrt(1/2) */ +#endif +#ifndef M_SQRT3_F +# define M_SQRT3_F (1.7320508075688772f) /* sqrt(3) */ +#endif +#ifndef M_LN2_F +# define M_LN2_F (0.6931471805599453f) /* ln(2) */ +#endif +#ifndef M_LN10_F +# define M_LN10_F (2.3025850929940457f) /* ln(10) */ +#endif + +/* Scalar */ + +#if !defined(__HIP__) && !defined(__KERNEL_ONEAPI__) +# ifdef _WIN32 +ccl_device_inline float fmaxf(float a, float b) +{ + return (a > b) ? a : b; +} + +ccl_device_inline float fminf(float a, float b) +{ + return (a < b) ? a : b; +} + +# endif /* _WIN32 */ +#endif /* __HIP__, __KERNEL_ONEAPI__ */ + +#if !defined(__KERNEL_GPU__) || defined(__KERNEL_ONEAPI__) +# ifndef __KERNEL_ONEAPI__ +using std::isfinite; +using std::isnan; +using std::sqrt; +# else +# define isfinite(x) sycl::isfinite((x)) +# define isnan(x) sycl::isnan((x)) +# endif + +ccl_device_inline int abs(int x) +{ + return (x > 0) ? x : -x; +} + +ccl_device_inline int max(int a, int b) +{ + return (a > b) ? a : b; +} + +ccl_device_inline int min(int a, int b) +{ + return (a < b) ? a : b; +} + +ccl_device_inline uint32_t max(uint32_t a, uint32_t b) +{ + return (a > b) ? a : b; +} + +ccl_device_inline uint32_t min(uint32_t a, uint32_t b) +{ + return (a < b) ? a : b; +} + +ccl_device_inline uint64_t max(uint64_t a, uint64_t b) +{ + return (a > b) ? a : b; +} + +ccl_device_inline uint64_t min(uint64_t a, uint64_t b) +{ + return (a < b) ? a : b; +} + +/* NOTE: On 64bit Darwin the `size_t` is defined as `unsigned long int` and `uint64_t` is defined + * as `unsigned long long`. Both of the definitions are 64 bit unsigned integer, but the automatic + * substitution does not allow to automatically pick function defined for `uint64_t` as it is not + * exactly the same type definition. + * Work this around by adding a templated function enabled for `size_t` type which will be used + * when there is no explicit specialization of `min()`/`max()` above. */ + +template +ccl_device_inline typename std::enable_if_t, T> max(T a, T b) +{ + return (a > b) ? a : b; +} + +template +ccl_device_inline typename std::enable_if_t, T> min(T a, T b) +{ + return (a < b) ? a : b; +} + +ccl_device_inline float max(float a, float b) +{ + return (a > b) ? a : b; +} + +ccl_device_inline float min(float a, float b) +{ + return (a < b) ? a : b; +} + +ccl_device_inline double max(double a, double b) +{ + return (a > b) ? a : b; +} + +ccl_device_inline double min(double a, double b) +{ + return (a < b) ? a : b; +} + +/* These 2 guys are templated for usage with registers data. + * + * NOTE: Since this is CPU-only functions it is ok to use references here. + * But for other devices we'll need to be careful about this. + */ + +template ccl_device_inline T min4(const T &a, const T &b, const T &c, const T &d) +{ + return min(min(a, b), min(c, d)); +} + +template ccl_device_inline T max4(const T &a, const T &b, const T &c, const T &d) +{ + return max(max(a, b), max(c, d)); +} +#endif /* __KERNEL_GPU__ */ + +ccl_device_inline float min4(float a, float b, float c, float d) +{ + return min(min(a, b), min(c, d)); +} + +ccl_device_inline float max4(float a, float b, float c, float d) +{ + return max(max(a, b), max(c, d)); +} + +#if !defined(__KERNEL_METAL__) && !defined(__KERNEL_ONEAPI__) +/* Int/Float conversion */ + +ccl_device_inline int as_int(uint i) +{ + union { + uint ui; + int i; + } u; + u.ui = i; + return u.i; +} + +ccl_device_inline uint as_uint(int i) +{ + union { + uint ui; + int i; + } u; + u.i = i; + return u.ui; +} + +ccl_device_inline uint as_uint(float f) +{ + union { + uint i; + float f; + } u; + u.f = f; + return u.i; +} + +# ifndef __HIP__ +ccl_device_inline int __float_as_int(float f) +{ + union { + int i; + float f; + } u; + u.f = f; + return u.i; +} + +ccl_device_inline float __int_as_float(int i) +{ + union { + int i; + float f; + } u; + u.i = i; + return u.f; +} + +ccl_device_inline uint __float_as_uint(float f) +{ + union { + uint i; + float f; + } u; + u.f = f; + return u.i; +} + +ccl_device_inline float __uint_as_float(uint i) +{ + union { + uint i; + float f; + } u; + u.i = i; + return u.f; +} +# endif + +#endif /* !defined(__KERNEL_METAL__) */ + +#if defined(__KERNEL_METAL__) +ccl_device_forceinline bool isnan_safe(float f) +{ + return isnan(f); +} + +ccl_device_forceinline bool isfinite_safe(float f) +{ + return isfinite(f); +} +#else +template ccl_device_inline uint pointer_pack_to_uint_0(T *ptr) +{ + return ((uint64_t)ptr) & 0xFFFFFFFF; +} + +template ccl_device_inline uint pointer_pack_to_uint_1(T *ptr) +{ + return (((uint64_t)ptr) >> 32) & 0xFFFFFFFF; +} + +template ccl_device_inline T *pointer_unpack_from_uint(const uint a, const uint b) +{ + return (T *)(((uint64_t)b << 32) | a); +} + +ccl_device_inline uint uint16_pack_to_uint(const uint a, const uint b) +{ + return (a << 16) | b; +} + +ccl_device_inline uint uint16_unpack_from_uint_0(const uint i) +{ + return i >> 16; +} + +ccl_device_inline uint uint16_unpack_from_uint_1(const uint i) +{ + return i & 0xFFFF; +} + +/* Versions of functions which are safe for fast math. */ +ccl_device_inline bool isnan_safe(float f) +{ + unsigned int x = __float_as_uint(f); + return (x << 1) > 0xff000000u; +} + +ccl_device_inline bool isfinite_safe(float f) +{ + /* By IEEE 754 rule, 2*Inf equals Inf */ + unsigned int x = __float_as_uint(f); + return (f == f) && (x == 0 || x == (1u << 31) || (f != 2.0f * f)) && !((x << 1) > 0xff000000u); +} +#endif + +ccl_device_inline float ensure_finite(float v) +{ + return isfinite_safe(v) ? v : 0.0f; +} + +#if !defined(__KERNEL_METAL__) +ccl_device_inline int clamp(int a, int mn, int mx) +{ + return min(max(a, mn), mx); +} + +ccl_device_inline float clamp(float a, float mn, float mx) +{ + return min(max(a, mn), mx); +} + +ccl_device_inline float mix(float a, float b, float t) +{ + return a + t * (b - a); +} + +ccl_device_inline float smoothstep(float edge0, float edge1, float x) +{ + float result; + if (x < edge0) { + result = 0.0f; + } + else if (x >= edge1) { + result = 1.0f; + } + else { + float t = (x - edge0) / (edge1 - edge0); + result = (3.0f - 2.0f * t) * (t * t); + } + return result; +} + +#endif /* !defined(__KERNEL_METAL__) */ + +#if defined(__KERNEL_CUDA__) +ccl_device_inline float saturatef(float a) +{ + return __saturatef(a); +} +#elif !defined(__KERNEL_METAL__) +ccl_device_inline float saturatef(float a) +{ + return clamp(a, 0.0f, 1.0f); +} +#endif /* __KERNEL_CUDA__ */ + +ccl_device_inline int float_to_int(float f) +{ + return (int)f; +} + +ccl_device_inline int floor_to_int(float f) +{ + return float_to_int(floorf(f)); +} + +ccl_device_inline float floorfrac(float x, ccl_private int *i) +{ + float f = floorf(x); + *i = float_to_int(f); + return x - f; +} + +ccl_device_inline int ceil_to_int(float f) +{ + return float_to_int(ceilf(f)); +} + +ccl_device_inline float fractf(float x) +{ + return x - floorf(x); +} + +/* Adapted from `godot-engine` math_funcs.h. */ +ccl_device_inline float wrapf(float value, float max, float min) +{ + float range = max - min; + return (range != 0.0f) ? value - (range * floorf((value - min) / range)) : min; +} + +ccl_device_inline float pingpongf(float a, float b) +{ + return (b != 0.0f) ? fabsf(fractf((a - b) / (b * 2.0f)) * b * 2.0f - b) : 0.0f; +} + +ccl_device_inline float smoothminf(float a, float b, float k) +{ + if (k != 0.0f) { + float h = fmaxf(k - fabsf(a - b), 0.0f) / k; + return fminf(a, b) - h * h * h * k * (1.0f / 6.0f); + } + return fminf(a, b); +} + +ccl_device_inline float signf(float f) +{ + return (f < 0.0f) ? -1.0f : 1.0f; +} + +ccl_device_inline float nonzerof(float f, float eps) +{ + if (fabsf(f) < eps) { + return signf(f) * eps; + } + return f; +} + +/* The behavior of `atan2(0, 0)` is undefined on many platforms, to ensure consistent behavior, we + * return 0 in this case. See !126951. + * Computes the angle between the positive x axis and the vector pointing from origin to (x, y). */ +ccl_device_inline float compatible_atan2(const float y, const float x) +{ + return (x == 0.0f && y == 0.0f) ? 0.0f : atan2f(y, x); +} + +/* `signum` function testing for zero. Matches GLSL and OSL functions. */ +ccl_device_inline float compatible_signf(float f) +{ + if (f == 0.0f) { + return 0.0f; + } + return signf(f); +} + +ccl_device_inline float smoothstepf(float f) +{ + if (f <= 0.0f) { + return 0.0f; + } + if (f >= 1.0f) { + return 1.0f; + } + float ff = f * f; + return (3.0f * ff - 2.0f * ff * f); +} + +ccl_device_inline int mod(int x, int m) +{ + return (x % m + m) % m; +} + +ccl_device_inline float inverse_lerp(float a, float b, float x) +{ + return (x - a) / (b - a); +} + +/* Cubic interpolation between b and c, a and d are the previous and next point. */ +ccl_device_inline float cubic_interp(float a, float b, float c, float d, float x) +{ + return 0.5f * + (((d + 3.0f * (b - c) - a) * x + (2.0f * a - 5.0f * b + 4.0f * c - d)) * x + + (c - a)) * + x + + b; +} + +/* NaN-safe math ops */ + +ccl_device_inline float safe_sqrtf(float f) +{ + return sqrtf(max(f, 0.0f)); +} + +ccl_device_inline float inversesqrtf(float f) +{ +#if defined(__KERNEL_METAL__) + return (f > 0.0f) ? rsqrt(f) : 0.0f; +#else + return (f > 0.0f) ? 1.0f / sqrtf(f) : 0.0f; +#endif +} + +ccl_device float safe_asinf(float a) +{ + return asinf(clamp(a, -1.0f, 1.0f)); +} + +ccl_device float safe_acosf(float a) +{ + return acosf(clamp(a, -1.0f, 1.0f)); +} + +ccl_device float compatible_powf(float x, float y) +{ +#ifdef __KERNEL_GPU__ + if (y == 0.0f) /* x^0 -> 1, including 0^0 */ + return 1.0f; + + /* GPU pow doesn't accept negative x, do manual checks here */ + if (x < 0.0f) { + if (fmodf(-y, 2.0f) == 0.0f) + return powf(-x, y); + else + return -powf(-x, y); + } + else if (x == 0.0f) + return 0.0f; +#endif + return powf(x, y); +} + +ccl_device float safe_powf(float a, float b) +{ + if (UNLIKELY(a < 0.0f && b != float_to_int(b))) { + return 0.0f; + } + + return compatible_powf(a, b); +} + +ccl_device float safe_divide(float a, float b) +{ + return (b != 0.0f) ? a / b : 0.0f; +} + +ccl_device float safe_logf(float a, float b) +{ + if (UNLIKELY(a <= 0.0f || b <= 0.0f)) { + return 0.0f; + } + + return safe_divide(logf(a), logf(b)); +} + +ccl_device float safe_modulo(float a, float b) +{ + return (b != 0.0f) ? fmodf(a, b) : 0.0f; +} + +ccl_device float safe_floored_modulo(float a, float b) +{ + return (b != 0.0f) ? a - floorf(a / b) * b : 0.0f; +} + +ccl_device_inline float sqr(float a) +{ + return a * a; +} + +ccl_device_inline float sin_from_cos(const float c) +{ + return safe_sqrtf(1.0f - sqr(c)); +} + +ccl_device_inline float cos_from_sin(const float s) +{ + return safe_sqrtf(1.0f - sqr(s)); +} + +ccl_device_inline float sin_sqr_to_one_minus_cos(const float s_sq) +{ + /* Using second-order Taylor expansion at small angles for better accuracy. */ + return s_sq > 0.0004f ? 1.0f - safe_sqrtf(1.0f - s_sq) : 0.5f * s_sq; +} + +ccl_device_inline float one_minus_cos(const float angle) +{ + /* Using second-order Taylor expansion at small angles for better accuracy. */ + return angle > 0.02f ? 1.0f - cosf(angle) : 0.5f * sqr(angle); +} + +ccl_device_inline float pow20(float a) +{ + return sqr(sqr(sqr(sqr(a)) * a)); +} + +ccl_device_inline float pow22(float a) +{ + return sqr(a * sqr(sqr(sqr(a)) * a)); +} + +#ifdef __KERNEL_METAL__ +ccl_device_inline float lgammaf(float x) +{ + /* Nemes, Gergő (2010), "New asymptotic expansion for the Gamma function", Archiv der Mathematik + */ + const float _1_180 = 1.0f / 180.0f; + const float log2pi = 1.83787706641f; + const float logx = log(x); + return (log2pi - logx + + x * (logx * 2.0f + log(x * sinh(1.0f / x) + (_1_180 / pow(x, 6.0f))) - 2.0f)) * + 0.5f; +} +#endif + +ccl_device_inline float beta(float x, float y) +{ + return expf(lgammaf(x) + lgammaf(y) - lgammaf(x + y)); +} + +ccl_device_inline float xor_signmask(float x, int y) +{ + return __int_as_float(__float_as_int(x) ^ y); +} + +ccl_device float bits_to_01(uint bits) +{ + return bits * (1.0f / (float)0xFFFFFFFF); +} + +#if !defined(__KERNEL_GPU__) +# if defined(__GNUC__) +ccl_device_inline uint popcount(uint x) +{ + return __builtin_popcount(x); +} +# else +ccl_device_inline uint popcount(uint x) +{ + /* TODO(Stefan): pop-count intrinsic for Windows with fallback for older CPUs. */ + uint i = x; + i = i - ((i >> 1) & 0x55555555); + i = (i & 0x33333333) + ((i >> 2) & 0x33333333); + i = (((i + (i >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24; + return i; +} +# endif +#elif defined(__KERNEL_ONEAPI__) +# define popcount(x) sycl::popcount(x) +#elif defined(__KERNEL_HIP__) +/* Use popcll to support 64-bit wave for pre-RDNA AMD GPUs */ +# define popcount(x) __popcll(x) +#elif !defined(__KERNEL_METAL__) +# define popcount(x) __popc(x) +#endif + +ccl_device_inline uint count_leading_zeros(uint x) +{ +#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) || defined(__KERNEL_HIP__) + return __clz(x); +#elif defined(__KERNEL_METAL__) + return clz(x); +#elif defined(__KERNEL_ONEAPI__) + return sycl::clz(x); +#else + assert(x != 0); +# ifdef _MSC_VER + unsigned long leading_zero = 0; + _BitScanReverse(&leading_zero, x); + return (31 - leading_zero); +# else + return __builtin_clz(x); +# endif +#endif +} + +ccl_device_inline uint count_trailing_zeros(uint x) +{ +#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) || defined(__KERNEL_HIP__) + return (__ffs(x) - 1); +#elif defined(__KERNEL_METAL__) + return ctz(x); +#elif defined(__KERNEL_ONEAPI__) + return sycl::ctz(x); +#else + assert(x != 0); +# ifdef _MSC_VER + unsigned long ctz = 0; + _BitScanForward(&ctz, x); + return ctz; +# else + return __builtin_ctz(x); +# endif +#endif +} + +ccl_device_inline uint find_first_set(uint x) +{ +#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) || defined(__KERNEL_HIP__) + return __ffs(x); +#elif defined(__KERNEL_METAL__) + return (x != 0) ? ctz(x) + 1 : 0; +#else +# ifdef _MSC_VER + return (x != 0) ? (32 - count_leading_zeros(x & (~x + 1))) : 0; +# else + return __builtin_ffs(x); +# endif +#endif +} + +/* Compares two floats. + * Returns true if their absolute difference is smaller than abs_diff (for numbers near zero) + * or their relative difference is less than ulp_diff ULPs. + * Based on + * https://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/ + */ + +ccl_device_inline bool compare_floats(float a, float b, float abs_diff, int ulp_diff) +{ + if (fabsf(a - b) < abs_diff) { + return true; + } + + if ((a < 0.0f) != (b < 0.0f)) { + return false; + } + + return (abs(__float_as_int(a) - __float_as_int(b)) < ulp_diff); +} + +/* Return value which is greater than the given one and is a power of two. */ +ccl_device_inline uint next_power_of_two(uint x) +{ + return x == 0 ? 1 : 1 << (32 - count_leading_zeros(x)); +} + +/* Return value which is lower than the given one and is a power of two. */ +ccl_device_inline uint prev_power_of_two(uint x) +{ + return x < 2 ? x : 1 << (31 - count_leading_zeros(x - 1)); +} + +#ifndef __has_builtin +# define __has_builtin(v) 0 +#endif + +/* Reverses the bits of a 32 bit integer. */ +ccl_device_inline uint32_t reverse_integer_bits(uint32_t x) +{ + /* Use a native instruction if it exists. */ +#if defined(__KERNEL_CUDA__) + return __brev(x); +#elif defined(__KERNEL_METAL__) + return reverse_bits(x); +#elif defined(__aarch64__) || (defined(_M_ARM64) && !defined(_MSC_VER)) + /* Assume the rbit is always available on 64bit ARM architecture. */ + __asm__("rbit %w0, %w1" : "=r"(x) : "r"(x)); + return x; +#elif defined(__arm__) && ((__ARM_ARCH > 7) || __ARM_ARCH == 6 && __ARM_ARCH_ISA_THUMB >= 2) + /* This ARM instruction is available in ARMv6T2 and above. + * This 32-bit Thumb instruction is available in ARMv6T2 and above. */ + __asm__("rbit %0, %1" : "=r"(x) : "r"(x)); + return x; +#elif __has_builtin(__builtin_bitreverse32) + return __builtin_bitreverse32(x); +#else + /* Flip pairwise. */ + x = ((x & 0x55555555) << 1) | ((x & 0xAAAAAAAA) >> 1); + /* Flip pairs. */ + x = ((x & 0x33333333) << 2) | ((x & 0xCCCCCCCC) >> 2); + /* Flip nibbles. */ + x = ((x & 0x0F0F0F0F) << 4) | ((x & 0xF0F0F0F0) >> 4); + /* Flip bytes. CPUs have an instruction for that, pretty fast one. */ +# ifdef _MSC_VER + return _byteswap_ulong(x); +# elif defined(__INTEL_COMPILER) + return (uint32_t)_bswap((int)x); +# else + /* Assuming gcc or clang. */ + return __builtin_bswap32(x); +# endif +#endif +} + +/* Solve quadratic equation a*x^2 + b*x + c = 0, adapted from Mitsuba 3 + * The solution is ordered so that x1 <= x2. + * Returns true if at least one solution is found. */ +ccl_device_inline bool solve_quadratic( + const float a, const float b, const float c, ccl_private float &x1, ccl_private float &x2) +{ + /* If the equation is linear, the solution is -c/b, but b has to be non-zero. */ + const bool valid_linear = (a == 0.0f) && (b != 0.0f); + x1 = x2 = -c / b; + + const float discriminant = sqr(b) - 4.0f * a * c; + /* Allow slightly negative discriminant in case of numerical precision issues. */ + const bool valid_quadratic = (a != 0.0f) && (discriminant > -1e-5f); + + if (valid_quadratic) { + /* Numerically stable version of (-b ± sqrt(discriminant)) / (2 * a), avoiding catastrophic + * cancellation when `b` is very close to `sqrt(discriminant)`, by finding the solution of + * greater magnitude which does not suffer from loss of precision, then using the identity + * x1 * x2 = c / a. */ + const float temp = -0.5f * (b + copysignf(safe_sqrtf(discriminant), b)); + const float r1 = temp / a; + const float r2 = c / temp; + + x1 = fminf(r1, r2); + x2 = fmaxf(r1, r2); + } + + return (valid_linear || valid_quadratic); +} + +/* Defines a closed interval [min, max]. */ +template struct Interval { + T min; + T max; + + ccl_device_inline_method bool is_empty() const + { + return min >= max; + } + + ccl_device_inline_method bool contains(T value) const + { + return value >= min && value <= max; + } + + ccl_device_inline_method T length() const + { + return max - min; + } +}; + +/* Computes the intersection of two intervals. */ +template +ccl_device_inline Interval intervals_intersection(ccl_private const Interval &first, + ccl_private const Interval &second) +{ + return {max(first.min, second.min), min(first.max, second.max)}; +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/util/math_cdf.cpp b/intern/cycles/util/math_cdf.cpp index 592d9ef717e..8b47528a608 100644 --- a/intern/cycles/util/math_cdf.cpp +++ b/intern/cycles/util/math_cdf.cpp @@ -4,8 +4,9 @@ #include "util/math_cdf.h" +#include + #include "util/algorithm.h" -#include "util/math.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/util/math_cdf.h b/intern/cycles/util/math_cdf.h index fef5570d8bd..9d0b1f5c861 100644 --- a/intern/cycles/util/math_cdf.h +++ b/intern/cycles/util/math_cdf.h @@ -4,8 +4,7 @@ #pragma once -#include "util/algorithm.h" -#include "util/math.h" +#include "util/math_base.h" #include "util/vector.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/util/math_fast.h b/intern/cycles/util/math_fast.h index 89095e587e4..7d94f877114 100644 --- a/intern/cycles/util/math_fast.h +++ b/intern/cycles/util/math_fast.h @@ -23,6 +23,13 @@ #pragma once +#include "util/math_base.h" +#include "util/math_float3.h" +#include "util/math_float4.h" +#include "util/math_int4.h" +#include "util/types_float3.h" +#include "util/types_float4.h" + CCL_NAMESPACE_BEGIN ccl_device_inline float madd(const float a, const float b, const float c) @@ -91,8 +98,9 @@ ccl_device float fast_sinf(float x) x = madd(qf, -1.2816720341285448015e-12f * 4, x); x = M_PI_2_F - (M_PI_2_F - x); /* Crush denormals */ float s = x * x; - if ((q & 1) != 0) + if ((q & 1) != 0) { x = -x; + } /* This polynomial approximation has very low error on [-pi/2,+pi/2] * 1.19209e-07 max error in total over [-2pi,+2pi]. */ float u = 2.6083159809786593541503e-06f; @@ -468,9 +476,7 @@ ccl_device_inline float fast_expm1f(float x) x = 1.0f - (1.0f - x); /* Crush denormals. */ return madd(0.5f, x * x, x); } - else { - return fast_expf(x) - 1.0f; - } + return fast_expf(x) - 1.0f; } ccl_device float fast_sinhf(float x) @@ -482,17 +488,15 @@ ccl_device float fast_sinhf(float x) float e = fast_expf(a); return copysignf(0.5f * e - 0.5f / e, x); } - else { - a = 1.0f - (1.0f - a); /* Crush denorms. */ - float a2 = a * a; - /* Degree 7 polynomial generated with sollya. */ - /* Examined 2130706434 values of sinh on [-1,1]: 1.19209e-07 max error. */ - float r = 2.03945513931e-4f; - r = madd(r, a2, 8.32990277558e-3f); - r = madd(r, a2, 0.1666673421859f); - r = madd(r * a, a2, a); - return copysignf(r, x); - } + a = 1.0f - (1.0f - a); /* Crush denorms. */ + float a2 = a * a; + /* Degree 7 polynomial generated with sollya. */ + /* Examined 2130706434 values of sinh on [-1,1]: 1.19209e-07 max error. */ + float r = 2.03945513931e-4f; + r = madd(r, a2, 8.32990277558e-3f); + r = madd(r, a2, 0.1666673421859f); + r = madd(r * a, a2, a); + return copysignf(r, x); } ccl_device_inline float fast_coshf(float x) @@ -516,10 +520,12 @@ ccl_device_inline float fast_tanhf(float x) ccl_device float fast_safe_powf(float x, float y) { - if (y == 0) + if (y == 0) { return 1.0f; /* x^1=1 */ - if (x == 0) + } + if (x == 0) { return 0.0f; /* 0^y=0 */ + } float sign = 1.0f; if (x < 0.0f) { /* if x is negative, only deal with integer powers diff --git a/intern/cycles/util/math_float2.h b/intern/cycles/util/math_float2.h index 90fbfc20113..31f0ca57538 100644 --- a/intern/cycles/util/math_float2.h +++ b/intern/cycles/util/math_float2.h @@ -4,9 +4,9 @@ #pragma once -#ifndef __UTIL_MATH_H__ -# error "Do not include this file directly, include util/types.h instead." -#endif +#include "util/math_base.h" +#include "util/types_float2.h" +#include "util/types_float4.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/util/math_float3.h b/intern/cycles/util/math_float3.h index f2b9fc248a3..1a1fb4df474 100644 --- a/intern/cycles/util/math_float3.h +++ b/intern/cycles/util/math_float3.h @@ -5,9 +5,10 @@ #pragma once -#ifndef __UTIL_MATH_H__ -# error "Do not include this file directly, include util/types.h instead." -#endif +#include "util/math_base.h" +#include "util/math_float4.h" +#include "util/types_float3.h" +#include "util/types_float4.h" CCL_NAMESPACE_BEGIN @@ -384,10 +385,10 @@ ccl_device_inline float3 reflect(const float3 incident, const float3 unit_normal ccl_device_inline float3 refract(const float3 incident, const float3 normal, const float eta) { float k = 1.0f - eta * eta * (1.0f - dot(normal, incident) * dot(normal, incident)); - if (k < 0.0f) + if (k < 0.0f) { return zero_float3(); - else - return eta * incident - (eta * dot(normal, incident) + sqrt(k)) * normal; + } + return eta * incident - (eta * dot(normal, incident) + sqrt(k)) * normal; } ccl_device_inline float3 faceforward(const float3 vector, @@ -498,13 +499,128 @@ ccl_device_inline bool isfinite_safe(float3 v) ccl_device_inline float3 ensure_finite(float3 v) { - if (!isfinite_safe(v.x)) + if (!isfinite_safe(v.x)) { v.x = 0.0f; - if (!isfinite_safe(v.y)) + } + if (!isfinite_safe(v.y)) { v.y = 0.0f; - if (!isfinite_safe(v.z)) + } + if (!isfinite_safe(v.z)) { v.z = 0.0f; + } return v; } +/* Triangle */ + +ccl_device_inline float triangle_area(ccl_private const float3 &v1, + ccl_private const float3 &v2, + ccl_private const float3 &v3) +{ + return len(cross(v3 - v2, v1 - v2)) * 0.5f; +} + +/* Orthonormal vectors */ + +ccl_device_inline void make_orthonormals(const float3 N, + ccl_private float3 *a, + ccl_private float3 *b) +{ +#if 0 + if (fabsf(N.y) >= 0.999f) { + *a = make_float3(1, 0, 0); + *b = make_float3(0, 0, 1); + return; + } + if (fabsf(N.z) >= 0.999f) { + *a = make_float3(1, 0, 0); + *b = make_float3(0, 1, 0); + return; + } +#endif + + if (N.x != N.y || N.x != N.z) { + *a = make_float3(N.z - N.y, N.x - N.z, N.y - N.x); //(1,1,1)x N + } + else { + *a = make_float3(N.z - N.y, N.x + N.z, -N.y - N.x); //(-1,1,1)x N + } + + *a = normalize(*a); + *b = cross(N, *a); +} + +/* Rotation of point around axis and angle */ + +ccl_device_inline float3 rotate_around_axis(float3 p, float3 axis, float angle) +{ + float costheta = cosf(angle); + float sintheta = sinf(angle); + float3 r; + + r.x = ((costheta + (1 - costheta) * axis.x * axis.x) * p.x) + + (((1 - costheta) * axis.x * axis.y - axis.z * sintheta) * p.y) + + (((1 - costheta) * axis.x * axis.z + axis.y * sintheta) * p.z); + + r.y = (((1 - costheta) * axis.x * axis.y + axis.z * sintheta) * p.x) + + ((costheta + (1 - costheta) * axis.y * axis.y) * p.y) + + (((1 - costheta) * axis.y * axis.z - axis.x * sintheta) * p.z); + + r.z = (((1 - costheta) * axis.x * axis.z - axis.y * sintheta) * p.x) + + (((1 - costheta) * axis.y * axis.z + axis.x * sintheta) * p.y) + + ((costheta + (1 - costheta) * axis.z * axis.z) * p.z); + + return r; +} + +/* Calculate the angle between the two vectors a and b. + * The usual approach `acos(dot(a, b))` has severe precision issues for small angles, + * which are avoided by this method. + * Based on "Mangled Angles" from https://people.eecs.berkeley.edu/~wkahan/Mindless.pdf + */ +ccl_device_inline float precise_angle(float3 a, float3 b) +{ + return 2.0f * atan2f(len(a - b), len(a + b)); +} + +/* Tangent of the angle between vectors a and b. */ +ccl_device_inline float tan_angle(float3 a, float3 b) +{ + return len(cross(a, b)) / dot(a, b); +} + +/* projections */ +ccl_device_inline float2 map_to_tube(const float3 co) +{ + float len, u, v; + len = sqrtf(co.x * co.x + co.y * co.y); + if (len > 0.0f) { + u = (1.0f - (atan2f(co.x / len, co.y / len) / M_PI_F)) * 0.5f; + v = (co.z + 1.0f) * 0.5f; + } + else { + u = v = 0.0f; + } + return make_float2(u, v); +} + +ccl_device_inline float2 map_to_sphere(const float3 co) +{ + float l = dot(co, co); + float u, v; + if (l > 0.0f) { + if (UNLIKELY(co.x == 0.0f && co.y == 0.0f)) { + u = 0.0f; /* Otherwise domain error. */ + } + else { + u = (0.5f - atan2f(co.x, co.y) * M_1_2PI_F); + } + v = 1.0f - safe_acosf(co.z / sqrtf(l)) * M_1_PI_F; + } + else { + u = v = 0.0f; + } + return make_float2(u, v); +} + CCL_NAMESPACE_END diff --git a/intern/cycles/util/math_float4.h b/intern/cycles/util/math_float4.h index 1e9177aa216..c18640ea0d9 100644 --- a/intern/cycles/util/math_float4.h +++ b/intern/cycles/util/math_float4.h @@ -5,9 +5,8 @@ #pragma once -#ifndef __UTIL_MATH_H__ -# error "Do not include this file directly, include util/types.h instead." -#endif +#include "util/math_base.h" +#include "util/types_float4.h" CCL_NAMESPACE_BEGIN @@ -244,41 +243,40 @@ ccl_device_inline float4 msub(const float4 a, const float4 b, const float4 c) } #ifdef __KERNEL_SSE__ -template -__forceinline const float4 shuffle(const float4 b) +template __forceinline float4 shuffle(const float4 a) { # ifdef __KERNEL_NEON__ - return float4(shuffle_neon(b.m128)); + return float4(shuffle_neon(a.m128)); # else return float4( - _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(b), _MM_SHUFFLE(i3, i2, i1, i0)))); + _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(a), _MM_SHUFFLE(i3, i2, i1, i0)))); # endif } -template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4 a) +template<> __forceinline float4 shuffle<0, 1, 0, 1>(const float4 a) { return float4(_mm_movelh_ps(a, a)); } -template<> __forceinline const float4 shuffle<2, 3, 2, 3>(const float4 a) +template<> __forceinline float4 shuffle<2, 3, 2, 3>(const float4 a) { return float4(_mm_movehl_ps(a, a)); } # ifdef __KERNEL_SSE3__ -template<> __forceinline const float4 shuffle<0, 0, 2, 2>(const float4 b) +template<> __forceinline float4 shuffle<0, 0, 2, 2>(const float4 a) { - return float4(_mm_moveldup_ps(b)); + return float4(_mm_moveldup_ps(a)); } -template<> __forceinline const float4 shuffle<1, 1, 3, 3>(const float4 b) +template<> __forceinline float4 shuffle<1, 1, 3, 3>(const float4 a) { - return float4(_mm_movehdup_ps(b)); + return float4(_mm_movehdup_ps(a)); } # endif /* __KERNEL_SSE3__ */ template -__forceinline const float4 shuffle(const float4 a, const float4 b) +__forceinline float4 shuffle(const float4 a, const float4 b) { # ifdef __KERNEL_NEON__ return float4(shuffle_neon(a, b)); @@ -287,11 +285,11 @@ __forceinline const float4 shuffle(const float4 a, const float4 b) # endif } -template __forceinline const float4 shuffle(const float4 b) +template __forceinline float4 shuffle(const float4 a) { - return shuffle(b); + return shuffle(a); } -template __forceinline const float4 shuffle(const float4 a, const float4 b) +template __forceinline float4 shuffle(const float4 a, const float4 b) { # ifdef __KERNEL_NEON__ return float4(shuffle_neon(a, b)); @@ -300,12 +298,12 @@ template __forceinline const float4 shuffle(const float4 a, const flo # endif } -template<> __forceinline const float4 shuffle<0, 1, 0, 1>(const float4 a, const float4 b) +template<> __forceinline float4 shuffle<0, 1, 0, 1>(const float4 a, const float4 b) { return float4(_mm_movelh_ps(a, b)); } -template<> __forceinline const float4 shuffle<2, 3, 2, 3>(const float4 a, const float4 b) +template<> __forceinline float4 shuffle<2, 3, 2, 3>(const float4 a, const float4 b) { return float4(_mm_movehl_ps(b, a)); } @@ -592,14 +590,18 @@ ccl_device_inline bool isfinite_safe(float4 v) ccl_device_inline float4 ensure_finite(float4 v) { - if (!isfinite_safe(v.x)) + if (!isfinite_safe(v.x)) { v.x = 0.0f; - if (!isfinite_safe(v.y)) + } + if (!isfinite_safe(v.y)) { v.y = 0.0f; - if (!isfinite_safe(v.z)) + } + if (!isfinite_safe(v.z)) { v.z = 0.0f; - if (!isfinite_safe(v.w)) + } + if (!isfinite_safe(v.w)) { v.w = 0.0f; + } return v; } @@ -609,4 +611,27 @@ ccl_device_inline float4 power(float4 v, float e) return make_float4(powf(v.x, e), powf(v.y, e), powf(v.z, e), powf(v.w, e)); } +#if !defined(__KERNEL_METAL__) && !defined(__KERNEL_ONEAPI__) +/* Int/Float conversion */ +ccl_device_inline int4 __float4_as_int4(float4 f) +{ +# ifdef __KERNEL_SSE__ + return int4(_mm_castps_si128(f.m128)); +# else + return make_int4( + __float_as_int(f.x), __float_as_int(f.y), __float_as_int(f.z), __float_as_int(f.w)); +# endif +} + +ccl_device_inline float4 __int4_as_float4(int4 i) +{ +# ifdef __KERNEL_SSE__ + return float4(_mm_castsi128_ps(i.m128)); +# else + return make_float4( + __int_as_float(i.x), __int_as_float(i.y), __int_as_float(i.z), __int_as_float(i.w)); +# endif +} +#endif /* !defined(__KERNEL_METAL__) && !defined(__KERNEL_ONEAPI__) */ + CCL_NAMESPACE_END diff --git a/intern/cycles/util/math_float8.h b/intern/cycles/util/math_float8.h index 6ee7abd4732..f537bd9aaf0 100644 --- a/intern/cycles/util/math_float8.h +++ b/intern/cycles/util/math_float8.h @@ -5,9 +5,9 @@ #pragma once -#ifndef __UTIL_MATH_H__ -# error "Do not include this file directly, include util/types.h instead." -#endif +#include "util/math_base.h" +#include "util/types_float8.h" +#include "util/types_int8.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/util/math_int2.h b/intern/cycles/util/math_int2.h index 07034d2893a..a90f839ce9f 100644 --- a/intern/cycles/util/math_int2.h +++ b/intern/cycles/util/math_int2.h @@ -4,9 +4,8 @@ #pragma once -#ifndef __UTIL_MATH_H__ -# error "Do not include this file directly, include util/types.h instead." -#endif +#include "util/defines.h" +#include "util/types_int2.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/util/math_int3.h b/intern/cycles/util/math_int3.h index bd2d606b1a9..31cb26f1e8e 100644 --- a/intern/cycles/util/math_int3.h +++ b/intern/cycles/util/math_int3.h @@ -4,9 +4,7 @@ #pragma once -#ifndef __UTIL_MATH_H__ -# error "Do not include this file directly, include util/types.h instead." -#endif +#include "util/types_int3.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/util/math_int4.h b/intern/cycles/util/math_int4.h index fb5473fa20b..7bc0b037c9f 100644 --- a/intern/cycles/util/math_int4.h +++ b/intern/cycles/util/math_int4.h @@ -5,9 +5,8 @@ #pragma once -#ifndef __UTIL_MATH_H__ -# error "Do not include this file directly, include util/types.h instead." -#endif +#include "util/types_float4.h" +#include "util/types_int4.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/util/math_int8.h b/intern/cycles/util/math_int8.h index 52246bf8559..83f44113a89 100644 --- a/intern/cycles/util/math_int8.h +++ b/intern/cycles/util/math_int8.h @@ -5,9 +5,9 @@ #pragma once -#ifndef __UTIL_MATH_H__ -# error "Do not include this file directly, include util/types.h instead." -#endif +#include "util/math_base.h" +#include "util/types_float8.h" +#include "util/types_int8.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/util/math_intersect.h b/intern/cycles/util/math_intersect.h index f85708fc4c2..710d7b40d79 100644 --- a/intern/cycles/util/math_intersect.h +++ b/intern/cycles/util/math_intersect.h @@ -4,6 +4,10 @@ #pragma once +#include "util/math_float2.h" +#include "util/math_float3.h" +#include "util/math_float4.h" + CCL_NAMESPACE_BEGIN /* Ray Intersection */ @@ -287,16 +291,20 @@ ccl_device bool ray_quad_intersect(float3 ray_P, } /* Store the result. */ /* TODO(sergey): Check whether we can avoid some checks here. */ - if (isect_P != nullptr) + if (isect_P != nullptr) { *isect_P = hit; - if (isect_t != nullptr) + } + if (isect_t != nullptr) { *isect_t = t; + } /* NOTE: Return barycentric coordinates in the same notation as Embree and OptiX. */ - if (isect_u != nullptr) + if (isect_u != nullptr) { *isect_u = v + 0.5f; - if (isect_v != nullptr) + } + if (isect_v != nullptr) { *isect_v = -u - v; + } return true; } diff --git a/intern/cycles/util/math_matrix.h b/intern/cycles/util/math_matrix.h index f3821208657..b88e084b9b8 100644 --- a/intern/cycles/util/math_matrix.h +++ b/intern/cycles/util/math_matrix.h @@ -4,6 +4,11 @@ #pragma once +#include "util/atomic.h" +#include "util/math_base.h" +#include "util/math_float3.h" +#include "util/math_float4.h" + CCL_NAMESPACE_BEGIN #define MAT(A, size, row, col) A[(row) * (size) + (col)] @@ -92,7 +97,7 @@ ccl_device_inline void math_vec3_add(ccl_private float3 *v, int n, ccl_private f } ccl_device_inline void math_vec3_add_strided( - ccl_global float3 *v, int n, ccl_private float *x, float3 w, int stride) + ccl_global float3 *v, int n, ccl_private const float *x, float3 w, int stride) { for (int i = 0; i < n; i++) { ccl_global float *elem = (ccl_global float *)(v + i * stride); @@ -215,16 +220,18 @@ ccl_device_inline void math_trimatrix_vec3_solve(ccl_global float *A, /* Use forward substitution to solve L*b = y, replacing y by b. */ for (int row = 0; row < n; row++) { float3 sum = VECS(y, row, stride); - for (int col = 0; col < row; col++) + for (int col = 0; col < row; col++) { sum -= MATHS(A, row, col, stride) * VECS(y, col, stride); + } VECS(y, row, stride) = sum / MATHS(A, row, row, stride); } /* Use backward substitution to solve Lt*S = b, replacing b by S. */ for (int row = n - 1; row >= 0; row--) { float3 sum = VECS(y, row, stride); - for (int col = row + 1; col < n; col++) + for (int col = row + 1; col < n; col++) { sum -= MATHS(A, col, row, stride) * VECS(y, col, stride); + } VECS(y, row, stride) = sum / MATHS(A, row, row, stride); } } @@ -430,7 +437,7 @@ ccl_device_inline void math_matrix_hsum(float *A, int n, const float4 *ccl_restr { for (int row = 0; row < n; row++) { for (int col = 0; col <= row; col++) { - MAT(A, n, row, col) = reduce_add(MAT(B, n, row, col))[0]; + MAT(A, n, row, col) = reduce_add(MAT(B, n, row, col)); } } } diff --git a/intern/cycles/util/projection_inverse.h b/intern/cycles/util/projection_inverse.h index 8d401ced2f5..e588d98b206 100644 --- a/intern/cycles/util/projection_inverse.h +++ b/intern/cycles/util/projection_inverse.h @@ -4,6 +4,8 @@ #pragma once +#include "util/defines.h" + CCL_NAMESPACE_BEGIN ccl_device_forceinline bool projection_inverse_impl(ccl_private float R[4][4], diff --git a/intern/cycles/util/rect.h b/intern/cycles/util/rect.h index bb646ffba73..dbaeb8831ae 100644 --- a/intern/cycles/util/rect.h +++ b/intern/cycles/util/rect.h @@ -4,7 +4,8 @@ #pragma once -#include "util/types.h" +#include "util/math_base.h" +#include "util/types_int4.h" CCL_NAMESPACE_BEGIN diff --git a/intern/cycles/util/stack_allocator.h b/intern/cycles/util/stack_allocator.h index 0c1736793c6..ad12aebb4ab 100644 --- a/intern/cycles/util/stack_allocator.h +++ b/intern/cycles/util/stack_allocator.h @@ -10,7 +10,8 @@ CCL_NAMESPACE_BEGIN /* Stack allocator for the use with STL. */ -template class ccl_try_align(16) StackAllocator { +template class ccl_try_align(16) StackAllocator +{ public: typedef size_t size_type; typedef ptrdiff_t difference_type; @@ -58,7 +59,7 @@ template class ccl_try_align(16) StackAllocator { return mem; } - void deallocate(T *p, size_t n) + void deallocate(T * p, size_t n) { if (p == nullptr) { return; @@ -77,7 +78,7 @@ template class ccl_try_align(16) StackAllocator { /* Address of an reference. */ - T *address(T &x) const + T *address(T & x) const { return &x; } @@ -89,14 +90,14 @@ template class ccl_try_align(16) StackAllocator { /* Object construction/destruction. */ - void construct(T *p, const T &val) + void construct(T * p, const T &val) { if (p != nullptr) { new ((T *)p) T(val); } } - void destroy(T *p) + void destroy(T * p) { p->~T(); } diff --git a/intern/cycles/util/types.h b/intern/cycles/util/types.h index ed0c8ef748b..e4ce7444874 100644 --- a/intern/cycles/util/types.h +++ b/intern/cycles/util/types.h @@ -2,134 +2,28 @@ * * SPDX-License-Identifier: Apache-2.0 */ -#ifndef __UTIL_TYPES_H__ -#define __UTIL_TYPES_H__ +#pragma once -#if !defined(__KERNEL_METAL__) -# include -#endif +#include "util/types_base.h" // IWYU pragma: export -/* Standard Integer Types */ +#include "util/types_uchar2.h" // IWYU pragma: export +#include "util/types_uchar3.h" // IWYU pragma: export +#include "util/types_uchar4.h" // IWYU pragma: export -#if !defined(__KERNEL_GPU__) -# include -# include -#endif +#include "util/types_int2.h" // IWYU pragma: export +#include "util/types_int3.h" // IWYU pragma: export +#include "util/types_int4.h" // IWYU pragma: export +#include "util/types_int8.h" // IWYU pragma: export -#include "util/defines.h" +#include "util/types_uint2.h" // IWYU pragma: export +#include "util/types_uint3.h" // IWYU pragma: export +#include "util/types_uint4.h" // IWYU pragma: export -#ifndef __KERNEL_GPU__ -# include "util/optimization.h" -# include "util/simd.h" -#endif +#include "util/types_ushort4.h" // IWYU pragma: export -CCL_NAMESPACE_BEGIN +#include "util/types_float2.h" // IWYU pragma: export +#include "util/types_float3.h" // IWYU pragma: export +#include "util/types_float4.h" // IWYU pragma: export +#include "util/types_float8.h" // IWYU pragma: export -/* Types - * - * Define simpler unsigned type names, and integer with defined number of bits. - * Also vector types, named to be compatible with OpenCL builtin types, while - * working for CUDA and C++ too. */ - -/* Shorter Unsigned Names */ - -typedef unsigned char uchar; -typedef unsigned int uint; -typedef unsigned short ushort; - -/* Fixed Bits Types */ - -#ifndef __KERNEL_GPU__ -/* Generic Memory Pointer */ - -typedef uint64_t device_ptr; -#endif /* __KERNEL_GPU__ */ - -ccl_device_inline size_t align_up(size_t offset, size_t alignment) -{ - return (offset + alignment - 1) & ~(alignment - 1); -} - -ccl_device_inline size_t divide_up(size_t x, size_t y) -{ - return (x + y - 1) / y; -} - -ccl_device_inline size_t round_up(size_t x, size_t multiple) -{ - return ((x + multiple - 1) / multiple) * multiple; -} - -ccl_device_inline size_t round_down(size_t x, size_t multiple) -{ - return (x / multiple) * multiple; -} - -ccl_device_inline bool is_power_of_two(size_t x) -{ - return (x & (x - 1)) == 0; -} - -CCL_NAMESPACE_END - -/* Device side printf only tested on CUDA, may work on more GPU devices. */ -#if !defined(__KERNEL_GPU__) || defined(__KERNEL_CUDA__) -# define __KERNEL_PRINTF__ -#endif - -ccl_device_inline void print_float(ccl_private const char *label, const float a) -{ -#ifdef __KERNEL_PRINTF__ - printf("%s: %.8f\n", label, (double)a); -#endif -} - -/* Most GPU APIs matching native vector types, so we only need to implement them for - * CPU and oneAPI. */ -#if defined(__KERNEL_GPU__) && !defined(__KERNEL_ONEAPI__) -# define __KERNEL_NATIVE_VECTOR_TYPES__ -#endif - -/* Vectorized types declaration. */ -#include "util/types_uchar2.h" -#include "util/types_uchar3.h" -#include "util/types_uchar4.h" - -#include "util/types_int2.h" -#include "util/types_int3.h" -#include "util/types_int4.h" -#include "util/types_int8.h" - -#include "util/types_uint2.h" -#include "util/types_uint3.h" -#include "util/types_uint4.h" - -#include "util/types_ushort4.h" - -#include "util/types_float2.h" -#include "util/types_float3.h" -#include "util/types_float4.h" -#include "util/types_float8.h" - -#include "util/types_spectrum.h" - -/* Vectorized types implementation. */ -#include "util/types_uchar2_impl.h" -#include "util/types_uchar3_impl.h" -#include "util/types_uchar4_impl.h" - -#include "util/types_int2_impl.h" -#include "util/types_int3_impl.h" -#include "util/types_int4_impl.h" -#include "util/types_int8_impl.h" - -#include "util/types_uint2_impl.h" -#include "util/types_uint3_impl.h" -#include "util/types_uint4_impl.h" - -#include "util/types_float2_impl.h" -#include "util/types_float3_impl.h" -#include "util/types_float4_impl.h" -#include "util/types_float8_impl.h" - -#endif /* __UTIL_TYPES_H__ */ +#include "util/types_spectrum.h" // IWYU pragma: export diff --git a/intern/cycles/util/types_base.h b/intern/cycles/util/types_base.h new file mode 100644 index 00000000000..ab466a873cd --- /dev/null +++ b/intern/cycles/util/types_base.h @@ -0,0 +1,90 @@ +/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation + * + * SPDX-License-Identifier: Apache-2.0 */ + +#pragma once + +#if !defined(__KERNEL_METAL__) +# include +#endif + +/* Standard Integer Types */ + +#if !defined(__KERNEL_GPU__) +# include +# include +#endif + +#include "util/defines.h" + +#ifndef __KERNEL_GPU__ +# include "util/optimization.h" +# include "util/simd.h" +#endif + +CCL_NAMESPACE_BEGIN + +/* Types + * + * Define simpler unsigned type names, and integer with defined number of bits. + * Also vector types, named to be compatible with OpenCL builtin types, while + * working for CUDA and C++ too. */ + +/* Shorter Unsigned Names */ + +using uchar = unsigned char; +using uint = unsigned int; +using ushort = unsigned short; + +/* Fixed Bits Types */ + +#ifndef __KERNEL_GPU__ +/* Generic Memory Pointer */ + +using device_ptr = uint64_t; +#endif /* __KERNEL_GPU__ */ + +ccl_device_inline size_t align_up(size_t offset, size_t alignment) +{ + return (offset + alignment - 1) & ~(alignment - 1); +} + +ccl_device_inline size_t divide_up(size_t x, size_t y) +{ + return (x + y - 1) / y; +} + +ccl_device_inline size_t round_up(size_t x, size_t multiple) +{ + return ((x + multiple - 1) / multiple) * multiple; +} + +ccl_device_inline size_t round_down(size_t x, size_t multiple) +{ + return (x / multiple) * multiple; +} + +ccl_device_inline bool is_power_of_two(size_t x) +{ + return (x & (x - 1)) == 0; +} + +CCL_NAMESPACE_END + +/* Device side printf only tested on CUDA, may work on more GPU devices. */ +#if !defined(__KERNEL_GPU__) || defined(__KERNEL_CUDA__) +# define __KERNEL_PRINTF__ +#endif + +ccl_device_inline void print_float(ccl_private const char *label, const float a) +{ +#ifdef __KERNEL_PRINTF__ + printf("%s: %.8f\n", label, (double)a); +#endif +} + +/* Most GPU APIs matching native vector types, so we only need to implement them for + * CPU and oneAPI. */ +#if defined(__KERNEL_GPU__) && !defined(__KERNEL_ONEAPI__) +# define __KERNEL_NATIVE_VECTOR_TYPES__ +#endif diff --git a/intern/cycles/util/types_float2.h b/intern/cycles/util/types_float2.h index 10ffc96f095..cbc8c111b40 100644 --- a/intern/cycles/util/types_float2.h +++ b/intern/cycles/util/types_float2.h @@ -4,9 +4,7 @@ #pragma once -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util/types.h instead." -#endif +#include "util/types_base.h" CCL_NAMESPACE_BEGIN @@ -20,10 +18,34 @@ struct float2 { # endif }; -ccl_device_inline float2 make_float2(float x, float y); +# ifndef __KERNEL_GPU__ +__forceinline float float2::operator[](int i) const +{ + util_assert(i >= 0); + util_assert(i < 2); + return *(&x + i); +} + +__forceinline float &float2::operator[](int i) +{ + util_assert(i >= 0); + util_assert(i < 2); + return *(&x + i); +} +# endif + +ccl_device_inline float2 make_float2(float x, float y) +{ + float2 a = {x, y}; + return a; +} #endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */ -ccl_device_inline float2 make_float2(float3 a); -ccl_device_inline void print_float2(ccl_private const char *label, const float2 a); +ccl_device_inline void print_float2(ccl_private const char *label, const float2 a) +{ +#ifdef __KERNEL_PRINTF__ + printf("%s: %.8f %.8f\n", label, (double)a.x, (double)a.y); +#endif +} CCL_NAMESPACE_END diff --git a/intern/cycles/util/types_float2_impl.h b/intern/cycles/util/types_float2_impl.h deleted file mode 100644 index 7da9ca14abf..00000000000 --- a/intern/cycles/util/types_float2_impl.h +++ /dev/null @@ -1,49 +0,0 @@ -/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation - * - * SPDX-License-Identifier: Apache-2.0 */ - -#pragma once - -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util/types.h instead." -#endif - -CCL_NAMESPACE_BEGIN - -#ifndef __KERNEL_NATIVE_VECTOR_TYPES__ -# ifndef __KERNEL_GPU__ -__forceinline float float2::operator[](int i) const -{ - util_assert(i >= 0); - util_assert(i < 2); - return *(&x + i); -} - -__forceinline float &float2::operator[](int i) -{ - util_assert(i >= 0); - util_assert(i < 2); - return *(&x + i); -} -# endif - -ccl_device_inline float2 make_float2(float x, float y) -{ - float2 a = {x, y}; - return a; -} -#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */ - -ccl_device_inline float2 make_float2(float3 a) -{ - return make_float2(a.x, a.y); -} - -ccl_device_inline void print_float2(ccl_private const char *label, const float2 a) -{ -#ifdef __KERNEL_PRINTF__ - printf("%s: %.8f %.8f\n", label, (double)a.x, (double)a.y); -#endif -} - -CCL_NAMESPACE_END diff --git a/intern/cycles/util/types_float3.h b/intern/cycles/util/types_float3.h index f90a4cc1ddf..78ad86b406d 100644 --- a/intern/cycles/util/types_float3.h +++ b/intern/cycles/util/types_float3.h @@ -4,9 +4,9 @@ #pragma once -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util/types.h instead." -#endif +#include "util/types_base.h" +#include "util/types_float2.h" +#include "util/types_int4.h" CCL_NAMESPACE_BEGIN @@ -37,33 +37,107 @@ struct ccl_try_align(16) float3 # ifdef __KERNEL_SSE__ /* Convenient constructors and operators for SIMD, otherwise default is enough. */ - __forceinline float3(); - __forceinline float3(const float3 &a); - __forceinline explicit float3(const __m128 &a); + __forceinline float3() = default; + __forceinline float3(const float3 &a) = default; + __forceinline explicit float3(const __m128 &a) : m128(a) {} - __forceinline operator const __m128 &() const; - __forceinline operator __m128 &(); + __forceinline operator const __m128 &() const + { + return m128; + } + __forceinline operator __m128 &() + { + return m128; + } - __forceinline float3 &operator=(const float3 &a); + __forceinline float3 &operator=(const float3 &a) + { + m128 = a.m128; + return *this; + } # endif # ifndef __KERNEL_GPU__ - __forceinline float operator[](int i) const; - __forceinline float &operator[](int i); + __forceinline float operator[](int i) const + { + util_assert(i >= 0); + util_assert(i < 3); + return *(&x + i); + } + __forceinline float &operator[](int i) + { + util_assert(i >= 0); + util_assert(i < 3); + return *(&x + i); + } # endif }; -ccl_device_inline float3 make_float3(float x, float y, float z); +ccl_device_inline float3 make_float3(float x, float y, float z) +{ +# if defined(__KERNEL_GPU__) + return {x, y, z}; +# elif defined(__KERNEL_SSE__) + return float3(_mm_set_ps(0.0f, z, y, x)); +# else + return {x, y, z, 0.0f}; +# endif +} + #endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */ -ccl_device_inline float3 make_float3(float f); -ccl_device_inline float3 make_float3(float4 a); -ccl_device_inline float3 make_float3(float2 a); -ccl_device_inline float3 make_float3(float2 a, float b); -ccl_device_inline void print_float3(ccl_private const char *label, const float3 a); +ccl_device_inline float3 make_float3(float f) +{ +#if defined(__KERNEL_GPU__) + return make_float3(f, f, f); +#elif defined(__KERNEL_SSE__) + return float3(_mm_set1_ps(f)); +#else + return {f, f, f, f}; +#endif +} -/* Smaller float3 for storage. For math operations this must be converted to float3, so that on the +ccl_device_inline float3 make_float3(float2 a) +{ + return make_float3(a.x, a.y, 0.0f); +} + +ccl_device_inline float3 make_float3(float2 a, float b) +{ + return make_float3(a.x, a.y, b); +} + +ccl_device_inline void print_float3(ccl_private const char *label, const float3 a) +{ +#ifdef __KERNEL_PRINTF__ + printf("%s: %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z); +#else + (void)label; + (void)a; +#endif +} + +ccl_device_inline float2 make_float2(float3 a) +{ + return make_float2(a.x, a.y); +} + +ccl_device_inline int4 make_int4(const float3 f) +{ +#if defined(__KERNEL_GPU__) + return make_int4((int)f.x, (int)f.y, (int)f.z, 0); +#elif defined(__KERNEL_SSE__) + return int4(_mm_cvtps_epi32(f.m128)); +#else + return make_int4((int)f.x, (int)f.y, (int)f.z, (int)f.w); +#endif +} + +/* Packed float3 + * + * Smaller float3 for storage. For math operations this must be converted to float3, so that on the * CPU SIMD instructions can be used. */ + #if defined(__KERNEL_METAL__) /* Metal has native packed_float3. */ #elif defined(__KERNEL_CUDA__) || defined(__KERNEL_HIP__) || defined(__KERNEL_ONEAPI__) @@ -71,7 +145,7 @@ ccl_device_inline void print_float3(ccl_private const char *label, const float3 typedef float3 packed_float3; #else struct packed_float3 { - ccl_device_inline_method packed_float3(){}; + ccl_device_inline_method packed_float3() = default; ccl_device_inline_method packed_float3(const float3 &a) : x(a.x), y(a.y), z(a.z) {} diff --git a/intern/cycles/util/types_float3_impl.h b/intern/cycles/util/types_float3_impl.h deleted file mode 100644 index dc9cd517665..00000000000 --- a/intern/cycles/util/types_float3_impl.h +++ /dev/null @@ -1,100 +0,0 @@ -/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation - * - * SPDX-License-Identifier: Apache-2.0 */ - -#pragma once - -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util/types.h instead." -#endif - -CCL_NAMESPACE_BEGIN - -#ifndef __KERNEL_NATIVE_VECTOR_TYPES__ -# ifdef __KERNEL_SSE__ -__forceinline float3::float3() {} - -__forceinline float3::float3(const float3 &a) : m128(a.m128) {} - -__forceinline float3::float3(const __m128 &a) : m128(a) {} - -__forceinline float3::operator const __m128 &() const -{ - return m128; -} - -__forceinline float3::operator __m128 &() -{ - return m128; -} - -__forceinline float3 &float3::operator=(const float3 &a) -{ - m128 = a.m128; - return *this; -} -# endif /* __KERNEL_SSE__ */ - -# ifndef __KERNEL_GPU__ -__forceinline float float3::operator[](int i) const -{ - util_assert(i >= 0); - util_assert(i < 3); - return *(&x + i); -} - -__forceinline float &float3::operator[](int i) -{ - util_assert(i >= 0); - util_assert(i < 3); - return *(&x + i); -} -# endif - -ccl_device_inline float3 make_float3(float x, float y, float z) -{ -# if defined(__KERNEL_GPU__) - return {x, y, z}; -# elif defined(__KERNEL_SSE__) - return float3(_mm_set_ps(0.0f, z, y, x)); -# else - return {x, y, z, 0.0f}; -# endif -} - -#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */ - -ccl_device_inline float3 make_float3(float f) -{ -#if defined(__KERNEL_GPU__) - return make_float3(f, f, f); -#elif defined(__KERNEL_SSE__) - return float3(_mm_set1_ps(f)); -#else - return {f, f, f, f}; -#endif -} - -ccl_device_inline float3 make_float3(float4 a) -{ - return make_float3(a.x, a.y, a.z); -} - -ccl_device_inline float3 make_float3(float2 a) -{ - return make_float3(a.x, a.y, 0.0f); -} - -ccl_device_inline float3 make_float3(float2 a, float b) -{ - return make_float3(a.x, a.y, b); -} - -ccl_device_inline void print_float3(ccl_private const char *label, const float3 a) -{ -#ifdef __KERNEL_PRINTF__ - printf("%s: %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z); -#endif -} - -CCL_NAMESPACE_END diff --git a/intern/cycles/util/types_float4.h b/intern/cycles/util/types_float4.h index b38b2aab96e..e487200d827 100644 --- a/intern/cycles/util/types_float4.h +++ b/intern/cycles/util/types_float4.h @@ -4,9 +4,9 @@ #pragma once -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util/types.h instead." -#endif +#include "util/types_base.h" +#include "util/types_float3.h" +#include "util/types_int4.h" CCL_NAMESPACE_BEGIN @@ -23,30 +23,103 @@ struct ccl_try_align(16) float4 }; }; - __forceinline float4(); - __forceinline explicit float4(const __m128 &a); + __forceinline float4() = default; + __forceinline float4(const float4 &a) = default; + __forceinline explicit float4(const __m128 &a) : m128(a) {} - __forceinline operator const __m128 &() const; - __forceinline operator __m128 &(); + __forceinline operator const __m128 &() const + { + return m128; + } + __forceinline operator __m128 &() + { + return m128; + } - __forceinline float4 &operator=(const float4 &a); + __forceinline float4 &operator=(const float4 &a) + { + m128 = a.m128; + return *this; + } # else /* __KERNEL_SSE__ */ float x, y, z, w; # endif /* __KERNEL_SSE__ */ # ifndef __KERNEL_GPU__ - __forceinline float operator[](int i) const; - __forceinline float &operator[](int i); + __forceinline float operator[](int i) const + { + util_assert(i >= 0); + util_assert(i < 4); + return *(&x + i); + } + __forceinline float &operator[](int i) + { + util_assert(i >= 0); + util_assert(i < 4); + return *(&x + i); + } # endif }; -ccl_device_inline float4 make_float4(float x, float y, float z, float w); +ccl_device_inline float4 make_float4(float x, float y, float z, float w) +{ +# ifdef __KERNEL_SSE__ + return float4(_mm_set_ps(w, z, y, x)); +# else + return {x, y, z, w}; +# endif +} + #endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */ -ccl_device_inline float4 make_float4(float f); -ccl_device_inline float4 make_float4(float3 a, float b); -ccl_device_inline float4 make_float4(const int4 i); -ccl_device_inline void print_float4(ccl_private const char *label, const float4 a); +ccl_device_inline float4 make_float4(float f) +{ +#ifdef __KERNEL_SSE__ + return float4(_mm_set1_ps(f)); +#else + return make_float4(f, f, f, f); +#endif +} + +ccl_device_inline float4 make_float4(float3 a, float b) +{ + return make_float4(a.x, a.y, a.z, b); +} + +ccl_device_inline float4 make_float4(float3 a) +{ + return make_float4(a.x, a.y, a.z, 1.0f); +} + +ccl_device_inline float4 make_float4(const int4 i) +{ +#ifdef __KERNEL_SSE__ + return float4(_mm_cvtepi32_ps(i.m128)); +#else + return make_float4((float)i.x, (float)i.y, (float)i.z, (float)i.w); +#endif +} + +ccl_device_inline float3 make_float3(float4 a) +{ + return make_float3(a.x, a.y, a.z); +} + +ccl_device_inline int4 make_int4(const float4 f) +{ +#ifdef __KERNEL_SSE__ + return int4(_mm_cvtps_epi32(f.m128)); +#else + return make_int4((int)f.x, (int)f.y, (int)f.z, (int)f.w); +#endif +} + +ccl_device_inline void print_float4(ccl_private const char *label, const float4 a) +{ +#ifdef __KERNEL_PRINTF__ + printf("%s: %.8f %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z, (double)a.w); +#endif +} CCL_NAMESPACE_END diff --git a/intern/cycles/util/types_float4_impl.h b/intern/cycles/util/types_float4_impl.h deleted file mode 100644 index 29a7644c97b..00000000000 --- a/intern/cycles/util/types_float4_impl.h +++ /dev/null @@ -1,98 +0,0 @@ -/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation - * - * SPDX-License-Identifier: Apache-2.0 */ - -#pragma once - -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util/types.h instead." -#endif - -CCL_NAMESPACE_BEGIN - -#ifndef __KERNEL_NATIVE_VECTOR_TYPES__ -# ifdef __KERNEL_SSE__ -__forceinline float4::float4() {} - -__forceinline float4::float4(const __m128 &a) : m128(a) {} - -__forceinline float4::operator const __m128 &() const -{ - return m128; -} - -__forceinline float4::operator __m128 &() -{ - return m128; -} - -__forceinline float4 &float4::operator=(const float4 &a) -{ - m128 = a.m128; - return *this; -} -# endif /* __KERNEL_SSE__ */ - -# ifndef __KERNEL_GPU__ -__forceinline float float4::operator[](int i) const -{ - util_assert(i >= 0); - util_assert(i < 4); - return *(&x + i); -} - -__forceinline float &float4::operator[](int i) -{ - util_assert(i >= 0); - util_assert(i < 4); - return *(&x + i); -} -# endif - -ccl_device_inline float4 make_float4(float x, float y, float z, float w) -{ -# ifdef __KERNEL_SSE__ - return float4(_mm_set_ps(w, z, y, x)); -# else - return {x, y, z, w}; -# endif -} - -#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */ - -ccl_device_inline float4 make_float4(float f) -{ -#ifdef __KERNEL_SSE__ - return float4(_mm_set1_ps(f)); -#else - return make_float4(f, f, f, f); -#endif -} - -ccl_device_inline float4 make_float4(float3 a, float b) -{ - return make_float4(a.x, a.y, a.z, b); -} - -ccl_device_inline float4 make_float4(float3 a) -{ - return make_float4(a.x, a.y, a.z, 1.0f); -} - -ccl_device_inline float4 make_float4(const int4 i) -{ -#ifdef __KERNEL_SSE__ - return float4(_mm_cvtepi32_ps(i.m128)); -#else - return make_float4((float)i.x, (float)i.y, (float)i.z, (float)i.w); -#endif -} - -ccl_device_inline void print_float4(ccl_private const char *label, const float4 a) -{ -#ifdef __KERNEL_PRINTF__ - printf("%s: %.8f %.8f %.8f %.8f\n", label, (double)a.x, (double)a.y, (double)a.z, (double)a.w); -#endif -} - -CCL_NAMESPACE_END diff --git a/intern/cycles/util/types_float8.h b/intern/cycles/util/types_float8.h index 1e893d9ddf0..e8eda2d435a 100644 --- a/intern/cycles/util/types_float8.h +++ b/intern/cycles/util/types_float8.h @@ -7,9 +7,9 @@ #pragma once -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util/types.h instead." -#endif +#include "util/types_base.h" +#include "util/types_float4.h" +#include "util/types_int8.h" CCL_NAMESPACE_BEGIN @@ -30,30 +30,99 @@ struct ccl_try_align(32) vfloat8 }; }; - __forceinline vfloat8(); - __forceinline vfloat8(const vfloat8 &a); - __forceinline explicit vfloat8(const __m256 &a); + __forceinline vfloat8() = default; + __forceinline vfloat8(const vfloat8 &a) = default; + __forceinline explicit vfloat8(const __m256 &a) : m256(a) {} - __forceinline operator const __m256 &() const; - __forceinline operator __m256 &(); + __forceinline operator const __m256 &() const + { + return m256; + } + __forceinline operator __m256 &() + { + return m256; + } - __forceinline vfloat8 &operator=(const vfloat8 &a); + __forceinline vfloat8 &operator=(const vfloat8 &a) + { + m256 = a.m256; + return *this; + } #else /* __KERNEL_AVX__ */ float a, b, c, d, e, f, g, h; #endif /* __KERNEL_AVX__ */ #ifndef __KERNEL_GPU__ - __forceinline float operator[](int i) const; - __forceinline float &operator[](int i); + __forceinline float operator[](int i) const + { + util_assert(i >= 0); + util_assert(i < 8); + return *(&a + i); + } + __forceinline float &operator[](int i) + { + util_assert(i >= 0); + util_assert(i < 8); + return *(&a + i); + } #endif }; -ccl_device_inline vfloat8 make_vfloat8(float f); -ccl_device_inline vfloat8 -make_vfloat8(float a, float b, float c, float d, float e, float f, float g, float h); -ccl_device_inline vfloat8 make_vfloat8(const float4 a, const float4 b); +ccl_device_inline vfloat8 make_vfloat8(float f) +{ +#ifdef __KERNEL_AVX__ + vfloat8 r(_mm256_set1_ps(f)); +#else + vfloat8 r = {f, f, f, f, f, f, f, f}; +#endif + return r; +} -ccl_device_inline void print_vfloat8(ccl_private const char *label, const vfloat8 a); +ccl_device_inline vfloat8 +make_vfloat8(float a, float b, float c, float d, float e, float f, float g, float h) +{ +#ifdef __KERNEL_AVX__ + vfloat8 r(_mm256_setr_ps(a, b, c, d, e, f, g, h)); +#else + vfloat8 r = {a, b, c, d, e, f, g, h}; +#endif + return r; +} + +ccl_device_inline vfloat8 make_vfloat8(const float4 a, const float4 b) +{ +#ifdef __KERNEL_AVX__ + return vfloat8(_mm256_insertf128_ps(_mm256_castps128_ps256(a), b, 1)); +#else + return make_vfloat8(a.x, a.y, a.z, a.w, b.x, b.y, b.z, b.w); +#endif +} + +ccl_device_inline void print_vfloat8(ccl_private const char *label, const vfloat8 a) +{ +#ifdef __KERNEL_PRINTF__ + printf("%s: %.8f %.8f %.8f %.8f %.8f %.8f %.8f %.8f\n", + label, + (double)a.a, + (double)a.b, + (double)a.c, + (double)a.d, + (double)a.e, + (double)a.f, + (double)a.g, + (double)a.h); +#endif +} + +ccl_device_inline vint8 make_vint8(const vfloat8 f) +{ +#ifdef __KERNEL_AVX__ + return vint8(_mm256_cvtps_epi32(f.m256)); +#else + return make_vint8( + (int)f.a, (int)f.b, (int)f.c, (int)f.d, (int)f.e, (int)f.f, (int)f.g, (int)f.h); +#endif +} CCL_NAMESPACE_END diff --git a/intern/cycles/util/types_float8_impl.h b/intern/cycles/util/types_float8_impl.h deleted file mode 100644 index 7ffbe7e40ea..00000000000 --- a/intern/cycles/util/types_float8_impl.h +++ /dev/null @@ -1,102 +0,0 @@ -/* SPDX-FileCopyrightText: 2017 Intel Corporation - * SPDX-FileCopyrightText: 2018-2022 Blender Foundation - * - * SPDX-License-Identifier: BSD-3-Clause - * - * Originally by Intel Corporation, modified by the Blender Foundation. */ - -#pragma once - -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util/types.h instead." -#endif - -CCL_NAMESPACE_BEGIN - -#ifdef __KERNEL_AVX__ -__forceinline vfloat8::vfloat8() {} - -__forceinline vfloat8::vfloat8(const vfloat8 &f) : m256(f.m256) {} - -__forceinline vfloat8::vfloat8(const __m256 &f) : m256(f) {} - -__forceinline vfloat8::operator const __m256 &() const -{ - return m256; -} - -__forceinline vfloat8::operator __m256 &() -{ - return m256; -} - -__forceinline vfloat8 &vfloat8::operator=(const vfloat8 &f) -{ - m256 = f.m256; - return *this; -} -#endif /* __KERNEL_AVX__ */ - -#ifndef __KERNEL_GPU__ -__forceinline float vfloat8::operator[](int i) const -{ - util_assert(i >= 0); - util_assert(i < 8); - return *(&a + i); -} - -__forceinline float &vfloat8::operator[](int i) -{ - util_assert(i >= 0); - util_assert(i < 8); - return *(&a + i); -} -#endif - -ccl_device_inline vfloat8 make_vfloat8(float f) -{ -#ifdef __KERNEL_AVX__ - vfloat8 r(_mm256_set1_ps(f)); -#else - vfloat8 r = {f, f, f, f, f, f, f, f}; -#endif - return r; -} - -ccl_device_inline vfloat8 -make_vfloat8(float a, float b, float c, float d, float e, float f, float g, float h) -{ -#ifdef __KERNEL_AVX__ - vfloat8 r(_mm256_setr_ps(a, b, c, d, e, f, g, h)); -#else - vfloat8 r = {a, b, c, d, e, f, g, h}; -#endif - return r; -} - -ccl_device_inline vfloat8 make_vfloat8(const float4 a, const float4 b) -{ -#ifdef __KERNEL_AVX__ - return vfloat8(_mm256_insertf128_ps(_mm256_castps128_ps256(a), b, 1)); -#else - return make_vfloat8(a.x, a.y, a.z, a.w, b.x, b.y, b.z, b.w); -#endif -} - -ccl_device_inline void print_vfloat8(ccl_private const char *label, const vfloat8 a) -{ -#ifdef __KERNEL_PRINTF__ - printf("%s: %.8f %.8f %.8f %.8f %.8f %.8f %.8f %.8f\n", - label, - (double)a.a, - (double)a.b, - (double)a.c, - (double)a.d, - (double)a.e, - (double)a.f, - (double)a.g, - (double)a.h); -#endif -} - -CCL_NAMESPACE_END diff --git a/intern/cycles/util/types_int2.h b/intern/cycles/util/types_int2.h index c911bef7638..1661b3cf1d7 100644 --- a/intern/cycles/util/types_int2.h +++ b/intern/cycles/util/types_int2.h @@ -4,9 +4,7 @@ #pragma once -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util/types.h instead." -#endif +#include "util/types_base.h" CCL_NAMESPACE_BEGIN @@ -15,12 +13,27 @@ struct int2 { int x, y; # ifndef __KERNEL_GPU__ - __forceinline int operator[](int i) const; - __forceinline int &operator[](int i); + __forceinline int operator[](int i) const + { + util_assert(i >= 0); + util_assert(i < 2); + return *(&x + i); + } + + __forceinline int &operator[](int i) + { + util_assert(i >= 0); + util_assert(i < 2); + return *(&x + i); + } # endif }; -ccl_device_inline int2 make_int2(int x, int y); +ccl_device_inline int2 make_int2(int x, int y) +{ + int2 a = {x, y}; + return a; +} #endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */ CCL_NAMESPACE_END diff --git a/intern/cycles/util/types_int2_impl.h b/intern/cycles/util/types_int2_impl.h deleted file mode 100644 index 0ce89caafba..00000000000 --- a/intern/cycles/util/types_int2_impl.h +++ /dev/null @@ -1,37 +0,0 @@ -/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation - * - * SPDX-License-Identifier: Apache-2.0 */ - -#pragma once - -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util/types.h instead." -#endif - -CCL_NAMESPACE_BEGIN - -#ifndef __KERNEL_NATIVE_VECTOR_TYPES__ -# ifndef __KERNEL_GPU__ -int int2::operator[](int i) const -{ - util_assert(i >= 0); - util_assert(i < 2); - return *(&x + i); -} - -int &int2::operator[](int i) -{ - util_assert(i >= 0); - util_assert(i < 2); - return *(&x + i); -} -# endif - -ccl_device_inline int2 make_int2(int x, int y) -{ - int2 a = {x, y}; - return a; -} -#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */ - -CCL_NAMESPACE_END diff --git a/intern/cycles/util/types_int3.h b/intern/cycles/util/types_int3.h index 56911ecc803..6bc6a6c986e 100644 --- a/intern/cycles/util/types_int3.h +++ b/intern/cycles/util/types_int3.h @@ -4,9 +4,7 @@ #pragma once -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util/types.h instead." -#endif +#include "util/types_base.h" CCL_NAMESPACE_BEGIN @@ -26,30 +24,77 @@ struct ccl_try_align(16) int3 }; }; - __forceinline int3(); - __forceinline int3(const int3 &a); - __forceinline explicit int3(const __m128i &a); + __forceinline int3() = default; + __forceinline int3(const int3 &a) = default; + __forceinline explicit int3(const __m128i &a) : m128(a) {} - __forceinline operator const __m128i &() const; - __forceinline operator __m128i &(); + __forceinline operator const __m128i &() const + { + return m128; + } - __forceinline int3 &operator=(const int3 &a); + __forceinline operator __m128i &() + { + return m128; + } + + __forceinline int3 &operator=(const int3 &a) + { + m128 = a.m128; + return *this; + } # else /* __KERNEL_SSE__ */ int x, y, z, w; # endif /* __KERNEL_SSE__ */ # endif # ifndef __KERNEL_GPU__ - __forceinline int operator[](int i) const; - __forceinline int &operator[](int i); + __forceinline int operator[](int i) const + { + util_assert(i >= 0); + util_assert(i < 3); + return *(&x + i); + } + + __forceinline int &operator[](int i) + { + util_assert(i >= 0); + util_assert(i < 3); + return *(&x + i); + } # endif }; -ccl_device_inline int3 make_int3(int x, int y, int z); +ccl_device_inline int3 make_int3(int x, int y, int z) +{ +# if defined(__KERNEL_GPU__) + return {x, y, z}; +# elif defined(__KERNEL_SSE__) + return int3(_mm_set_epi32(0, z, y, x)); +# else + return {x, y, z, 0}; +# endif +} + #endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */ -ccl_device_inline int3 make_int3(int i); -ccl_device_inline void print_int3(ccl_private const char *label, const int3 a); +ccl_device_inline int3 make_int3(int i) +{ +#if defined(__KERNEL_GPU__) + return make_int3(i, i, i); +#elif defined(__KERNEL_SSE__) + return int3(_mm_set1_epi32(i)); +#else + return {i, i, i, i}; +#endif +} + +ccl_device_inline void print_int3(ccl_private const char *label, const int3 a) +{ +#ifdef __KERNEL_PRINTF__ + printf("%s: %d %d %d\n", label, a.x, a.y, a.z); +#endif +} #if defined(__KERNEL_METAL__) /* Metal has native packed_int3. */ @@ -61,7 +106,7 @@ typedef int3 packed_int3; struct packed_int3 { int x, y, z; - ccl_device_inline_method packed_int3(){}; + ccl_device_inline_method packed_int3() = default; ccl_device_inline_method packed_int3(const int px, const int py, const int pz) : x(px), y(py), z(pz){}; @@ -82,12 +127,27 @@ struct packed_int3 { } # ifndef __KERNEL_GPU__ - __forceinline int operator[](int i) const; - __forceinline int &operator[](int i); + __forceinline int operator[](int i) const + { + util_assert(i < 3); + return *(&x + i); + } + + __forceinline int &operator[](int i) + { + util_assert(i < 3); + return *(&x + i); + } # endif }; static_assert(sizeof(packed_int3) == 12, "packed_int3 expected to be exactly 12 bytes"); #endif +ccl_device_inline packed_int3 make_packed_int3(int x, int y, int z) +{ + packed_int3 a = {x, y, z}; + return a; +} + CCL_NAMESPACE_END diff --git a/intern/cycles/util/types_int3_impl.h b/intern/cycles/util/types_int3_impl.h deleted file mode 100644 index 8e2e4af52dc..00000000000 --- a/intern/cycles/util/types_int3_impl.h +++ /dev/null @@ -1,103 +0,0 @@ -/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation - * - * SPDX-License-Identifier: Apache-2.0 */ - -#pragma once - -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util/types.h instead." -#endif - -CCL_NAMESPACE_BEGIN - -#ifndef __KERNEL_NATIVE_VECTOR_TYPES__ -# ifdef __KERNEL_SSE__ -__forceinline int3::int3() {} - -__forceinline int3::int3(const __m128i &a) : m128(a) {} - -__forceinline int3::int3(const int3 &a) : m128(a.m128) {} - -__forceinline int3::operator const __m128i &() const -{ - return m128; -} - -__forceinline int3::operator __m128i &() -{ - return m128; -} - -__forceinline int3 &int3::operator=(const int3 &a) -{ - m128 = a.m128; - return *this; -} -# endif /* __KERNEL_SSE__ */ - -# ifndef __KERNEL_GPU__ -__forceinline int int3::operator[](int i) const -{ - util_assert(i >= 0); - util_assert(i < 3); - return *(&x + i); -} - -__forceinline int &int3::operator[](int i) -{ - util_assert(i >= 0); - util_assert(i < 3); - return *(&x + i); -} - -__forceinline int packed_int3::operator[](int i) const -{ - util_assert(i < 3); - return *(&x + i); -} - -__forceinline int &packed_int3::operator[](int i) -{ - util_assert(i < 3); - return *(&x + i); -} -# endif - -ccl_device_inline int3 make_int3(int x, int y, int z) -{ -# if defined(__KERNEL_GPU__) - return {x, y, z}; -# elif defined(__KERNEL_SSE__) - return int3(_mm_set_epi32(0, z, y, x)); -# else - return {x, y, z, 0}; -# endif -} - -#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */ - -ccl_device_inline int3 make_int3(int i) -{ -#if defined(__KERNEL_GPU__) - return make_int3(i, i, i); -#elif defined(__KERNEL_SSE__) - return int3(_mm_set1_epi32(i)); -#else - return {i, i, i, i}; -#endif -} - -ccl_device_inline packed_int3 make_packed_int3(int x, int y, int z) -{ - packed_int3 a = {x, y, z}; - return a; -} - -ccl_device_inline void print_int3(ccl_private const char *label, const int3 a) -{ -#ifdef __KERNEL_PRINTF__ - printf("%s: %d %d %d\n", label, a.x, a.y, a.z); -#endif -} - -CCL_NAMESPACE_END diff --git a/intern/cycles/util/types_int4.h b/intern/cycles/util/types_int4.h index c38d3a439cd..786ff5ee080 100644 --- a/intern/cycles/util/types_int4.h +++ b/intern/cycles/util/types_int4.h @@ -4,17 +4,12 @@ #pragma once -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util/types.h instead." -#endif +#include "util/types_base.h" CCL_NAMESPACE_BEGIN #ifndef __KERNEL_NATIVE_VECTOR_TYPES__ -struct float3; -struct float4; - struct ccl_try_align(16) int4 { # ifdef __KERNEL_SSE__ @@ -25,30 +20,68 @@ struct ccl_try_align(16) int4 }; }; - __forceinline int4(); - __forceinline int4(const int4 &a); - __forceinline explicit int4(const __m128i &a); + __forceinline int4() = default; + __forceinline int4(const int4 &a) = default; + __forceinline explicit int4(const __m128i &a) : m128(a) {} - __forceinline operator const __m128i &() const; - __forceinline operator __m128i &(); + __forceinline operator const __m128i &() const + { + return m128; + } + __forceinline operator __m128i &() + { + return m128; + } - __forceinline int4 &operator=(const int4 &a); + __forceinline int4 &operator=(const int4 &a) + { + m128 = a.m128; + return *this; + } # else /* __KERNEL_SSE__ */ int x, y, z, w; # endif /* __KERNEL_SSE__ */ # ifndef __KERNEL_GPU__ - __forceinline int operator[](int i) const; - __forceinline int &operator[](int i); + __forceinline int operator[](int i) const + { + util_assert(i >= 0); + util_assert(i < 4); + return *(&x + i); + } + __forceinline int &operator[](int i) + { + util_assert(i >= 0); + util_assert(i < 4); + return *(&x + i); + } # endif }; -ccl_device_inline int4 make_int4(int x, int y, int z, int w); +ccl_device_inline int4 make_int4(int x, int y, int z, int w) +{ +# ifdef __KERNEL_SSE__ + return int4(_mm_set_epi32(w, z, y, x)); +# else + return {x, y, z, w}; +# endif +} #endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */ -ccl_device_inline int4 make_int4(int i); -ccl_device_inline int4 make_int4(const float3 f); -ccl_device_inline int4 make_int4(const float4 f); -ccl_device_inline void print_int4(ccl_private const char *label, const int4 a); +ccl_device_inline int4 make_int4(int i) +{ +#ifdef __KERNEL_SSE__ + return int4(_mm_set1_epi32(i)); +#else + return make_int4(i, i, i, i); +#endif +} + +ccl_device_inline void print_int4(ccl_private const char *label, const int4 a) +{ +#ifdef __KERNEL_PRINTF__ + printf("%s: %d %d %d %d\n", label, a.x, a.y, a.z, a.w); +#endif +} CCL_NAMESPACE_END diff --git a/intern/cycles/util/types_int4_impl.h b/intern/cycles/util/types_int4_impl.h deleted file mode 100644 index b44cf630e89..00000000000 --- a/intern/cycles/util/types_int4_impl.h +++ /dev/null @@ -1,101 +0,0 @@ -/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation - * - * SPDX-License-Identifier: Apache-2.0 */ - -#pragma once - -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util/types.h instead." -#endif - -CCL_NAMESPACE_BEGIN - -#ifndef __KERNEL_NATIVE_VECTOR_TYPES__ -# ifdef __KERNEL_SSE__ -__forceinline int4::int4() {} - -__forceinline int4::int4(const int4 &a) : m128(a.m128) {} - -__forceinline int4::int4(const __m128i &a) : m128(a) {} - -__forceinline int4::operator const __m128i &() const -{ - return m128; -} - -__forceinline int4::operator __m128i &() -{ - return m128; -} - -__forceinline int4 &int4::operator=(const int4 &a) -{ - m128 = a.m128; - return *this; -} -# endif /* __KERNEL_SSE__ */ - -# ifndef __KERNEL_GPU__ -__forceinline int int4::operator[](int i) const -{ - util_assert(i >= 0); - util_assert(i < 4); - return *(&x + i); -} - -__forceinline int &int4::operator[](int i) -{ - util_assert(i >= 0); - util_assert(i < 4); - return *(&x + i); -} -# endif - -ccl_device_inline int4 make_int4(int x, int y, int z, int w) -{ -# ifdef __KERNEL_SSE__ - return int4(_mm_set_epi32(w, z, y, x)); -# else - return {x, y, z, w}; -# endif -} - -#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */ - -ccl_device_inline int4 make_int4(int i) -{ -#ifdef __KERNEL_SSE__ - return int4(_mm_set1_epi32(i)); -#else - return make_int4(i, i, i, i); -#endif -} - -ccl_device_inline int4 make_int4(const float3 f) -{ -#if defined(__KERNEL_GPU__) - return make_int4((int)f.x, (int)f.y, (int)f.z, 0); -#elif defined(__KERNEL_SSE__) - return int4(_mm_cvtps_epi32(f.m128)); -#else - return make_int4((int)f.x, (int)f.y, (int)f.z, (int)f.w); -#endif -} - -ccl_device_inline int4 make_int4(const float4 f) -{ -#ifdef __KERNEL_SSE__ - return int4(_mm_cvtps_epi32(f.m128)); -#else - return make_int4((int)f.x, (int)f.y, (int)f.z, (int)f.w); -#endif -} - -ccl_device_inline void print_int4(ccl_private const char *label, const int4 a) -{ -#ifdef __KERNEL_PRINTF__ - printf("%s: %d %d %d %d\n", label, a.x, a.y, a.z, a.w); -#endif -} - -CCL_NAMESPACE_END diff --git a/intern/cycles/util/types_int8.h b/intern/cycles/util/types_int8.h index e5c663572b5..229daa7c98c 100644 --- a/intern/cycles/util/types_int8.h +++ b/intern/cycles/util/types_int8.h @@ -4,9 +4,8 @@ #pragma once -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util/types.h instead." -#endif +#include "util/types_base.h" +#include "util/types_int4.h" CCL_NAMESPACE_BEGIN @@ -26,27 +25,69 @@ struct ccl_try_align(32) vint8 }; }; - __forceinline vint8(); - __forceinline vint8(const vint8 &a); - __forceinline explicit vint8(const __m256i &a); + __forceinline vint8() = default; + __forceinline vint8(const vint8 &a) = default; + __forceinline explicit vint8(const __m256i &a) : m256(a) {} - __forceinline operator const __m256i &() const; - __forceinline operator __m256i &(); + __forceinline operator const __m256i &() const + { + return m256; + } + __forceinline operator __m256i &() + { + return m256; + } - __forceinline vint8 &operator=(const vint8 &a); + __forceinline vint8 &operator=(const vint8 &a) + { + m256 = a.m256; + return *this; + } #else /* __KERNEL_AVX__ */ int a, b, c, d, e, f, g, h; #endif /* __KERNEL_AVX__ */ #ifndef __KERNEL_GPU__ - __forceinline int operator[](int i) const; - __forceinline int &operator[](int i); + __forceinline int operator[](int i) const + { + util_assert(i >= 0); + util_assert(i < 8); + return *(&a + i); + } + __forceinline int &operator[](int i) + { + util_assert(i >= 0); + util_assert(i < 8); + return *(&a + i); + } #endif }; -ccl_device_inline vint8 make_vint8(int a, int b, int c, int d, int e, int f, int g, int h); -ccl_device_inline vint8 make_vint8(int i); -ccl_device_inline vint8 make_vint8(const vfloat8 f); -ccl_device_inline vint8 make_vint8(const int4 a, const int4 b); +ccl_device_inline vint8 make_vint8(int a, int b, int c, int d, int e, int f, int g, int h) +{ +#ifdef __KERNEL_AVX__ + return vint8(_mm256_set_epi32(h, g, f, e, d, c, b, a)); +#else + return {a, b, c, d, e, f, g, h}; +#endif +} + +ccl_device_inline vint8 make_vint8(int i) +{ +#ifdef __KERNEL_AVX__ + return vint8(_mm256_set1_epi32(i)); +#else + return make_vint8(i, i, i, i, i, i, i, i); +#endif +} + +ccl_device_inline vint8 make_vint8(const int4 a, const int4 b) +{ +#ifdef __KERNEL_AVX__ + return vint8(_mm256_insertf128_si256(_mm256_castsi128_si256(a.m128), b.m128, 1)); +#else + return make_vint8(a.x, a.y, a.z, a.w, b.x, b.y, b.z, b.w); +#endif +} CCL_NAMESPACE_END diff --git a/intern/cycles/util/types_int8_impl.h b/intern/cycles/util/types_int8_impl.h deleted file mode 100644 index 307358ed69d..00000000000 --- a/intern/cycles/util/types_int8_impl.h +++ /dev/null @@ -1,90 +0,0 @@ -/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation - * - * SPDX-License-Identifier: Apache-2.0 */ - -#pragma once - -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util/types.h instead." -#endif - -CCL_NAMESPACE_BEGIN - -#ifdef __KERNEL_AVX__ -__forceinline vint8::vint8() {} - -__forceinline vint8::vint8(const vint8 &a) : m256(a.m256) {} - -__forceinline vint8::vint8(const __m256i &a) : m256(a) {} - -__forceinline vint8::operator const __m256i &() const -{ - return m256; -} - -__forceinline vint8::operator __m256i &() -{ - return m256; -} - -__forceinline vint8 &vint8::operator=(const vint8 &a) -{ - m256 = a.m256; - return *this; -} -#endif /* __KERNEL_AVX__ */ - -#ifndef __KERNEL_GPU__ -__forceinline int vint8::operator[](int i) const -{ - util_assert(i >= 0); - util_assert(i < 8); - return *(&a + i); -} - -__forceinline int &vint8::operator[](int i) -{ - util_assert(i >= 0); - util_assert(i < 8); - return *(&a + i); -} -#endif - -ccl_device_inline vint8 make_vint8(int a, int b, int c, int d, int e, int f, int g, int h) -{ -#ifdef __KERNEL_AVX__ - return vint8(_mm256_set_epi32(h, g, f, e, d, c, b, a)); -#else - return {a, b, c, d, e, f, g, h}; -#endif -} - -ccl_device_inline vint8 make_vint8(int i) -{ -#ifdef __KERNEL_AVX__ - return vint8(_mm256_set1_epi32(i)); -#else - return make_vint8(i, i, i, i, i, i, i, i); -#endif -} - -ccl_device_inline vint8 make_vint8(const vfloat8 f) -{ -#ifdef __KERNEL_AVX__ - return vint8(_mm256_cvtps_epi32(f.m256)); -#else - return make_vint8( - (int)f.a, (int)f.b, (int)f.c, (int)f.d, (int)f.e, (int)f.f, (int)f.g, (int)f.h); -#endif -} - -ccl_device_inline vint8 make_vint8(const int4 a, const int4 b) -{ -#ifdef __KERNEL_AVX__ - return vint8(_mm256_insertf128_si256(_mm256_castsi128_si256(a.m128), b.m128, 1)); -#else - return make_vint8(a.x, a.y, a.z, a.w, b.x, b.y, b.z, b.w); -#endif -} - -CCL_NAMESPACE_END diff --git a/intern/cycles/util/types_spectrum.h b/intern/cycles/util/types_spectrum.h index fa038d38755..f886760e7ec 100644 --- a/intern/cycles/util/types_spectrum.h +++ b/intern/cycles/util/types_spectrum.h @@ -4,25 +4,21 @@ #pragma once -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util/types.h instead." -#endif +#include "util/types_float3.h" CCL_NAMESPACE_BEGIN #define SPECTRUM_CHANNELS 3 -#define SPECTRUM_DATA_TYPE float3 -#define PACKED_SPECTRUM_DATA_TYPE packed_float3 -using Spectrum = SPECTRUM_DATA_TYPE; -using PackedSpectrum = PACKED_SPECTRUM_DATA_TYPE; +using Spectrum = float3; +using PackedSpectrum = packed_float3; -#define make_spectrum(f) CONCAT(make_, SPECTRUM_DATA_TYPE(f)) -#define load_spectrum(f) CONCAT(load_, SPECTRUM_DATA_TYPE(f)) -#define store_spectrum(s, f) CONCAT(store_, SPECTRUM_DATA_TYPE((s), (f))) +#define make_spectrum(f) make_float3(f) +#define load_spectrum(f) load_float3(f) +#define store_spectrum(s, f) store_float3(f) -#define zero_spectrum CONCAT(zero_, SPECTRUM_DATA_TYPE) -#define one_spectrum CONCAT(one_, SPECTRUM_DATA_TYPE) +#define zero_spectrum zero_float3 +#define one_spectrum one_float3 #define FOREACH_SPECTRUM_CHANNEL(counter) \ for (int counter = 0; counter < SPECTRUM_CHANNELS; counter++) diff --git a/intern/cycles/util/types_uchar2.h b/intern/cycles/util/types_uchar2.h index 318273d2dec..324dde59ad7 100644 --- a/intern/cycles/util/types_uchar2.h +++ b/intern/cycles/util/types_uchar2.h @@ -4,9 +4,7 @@ #pragma once -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util/types.h instead." -#endif +#include "util/types_base.h" CCL_NAMESPACE_BEGIN @@ -15,12 +13,27 @@ struct uchar2 { uchar x, y; # ifndef __KERNEL_GPU__ - __forceinline uchar operator[](int i) const; - __forceinline uchar &operator[](int i); + __forceinline uchar operator[](int i) const + { + util_assert(i >= 0); + util_assert(i < 2); + return *(&x + i); + } + + __forceinline uchar &operator[](int i) + { + util_assert(i >= 0); + util_assert(i < 2); + return *(&x + i); + } # endif }; -ccl_device_inline uchar2 make_uchar2(uchar x, uchar y); +ccl_device_inline uchar2 make_uchar2(uchar x, uchar y) +{ + uchar2 a = {x, y}; + return a; +} #endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */ CCL_NAMESPACE_END diff --git a/intern/cycles/util/types_uchar2_impl.h b/intern/cycles/util/types_uchar2_impl.h deleted file mode 100644 index fd682906dbd..00000000000 --- a/intern/cycles/util/types_uchar2_impl.h +++ /dev/null @@ -1,37 +0,0 @@ -/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation - * - * SPDX-License-Identifier: Apache-2.0 */ - -#pragma once - -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util/types.h instead." -#endif - -CCL_NAMESPACE_BEGIN - -#ifndef __KERNEL_NATIVE_VECTOR_TYPES__ -# ifndef __KERNEL_GPU__ -uchar uchar2::operator[](int i) const -{ - util_assert(i >= 0); - util_assert(i < 2); - return *(&x + i); -} - -uchar &uchar2::operator[](int i) -{ - util_assert(i >= 0); - util_assert(i < 2); - return *(&x + i); -} -# endif - -ccl_device_inline uchar2 make_uchar2(uchar x, uchar y) -{ - uchar2 a = {x, y}; - return a; -} -#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */ - -CCL_NAMESPACE_END diff --git a/intern/cycles/util/types_uchar3.h b/intern/cycles/util/types_uchar3.h index 91c1f6a76f8..183628b134f 100644 --- a/intern/cycles/util/types_uchar3.h +++ b/intern/cycles/util/types_uchar3.h @@ -4,9 +4,7 @@ #pragma once -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util/types.h instead." -#endif +#include "util/types_base.h" CCL_NAMESPACE_BEGIN @@ -15,12 +13,27 @@ struct uchar3 { uchar x, y, z; # ifndef __KERNEL_GPU__ - __forceinline uchar operator[](int i) const; - __forceinline uchar &operator[](int i); + __forceinline uchar operator[](int i) const + { + util_assert(i >= 0); + util_assert(i < 3); + return *(&x + i); + } + + __forceinline uchar &operator[](int i) + { + util_assert(i >= 0); + util_assert(i < 3); + return *(&x + i); + } # endif }; -ccl_device_inline uchar3 make_uchar3(uchar x, uchar y, uchar z); +ccl_device_inline uchar3 make_uchar3(uchar x, uchar y, uchar z) +{ + uchar3 a = {x, y, z}; + return a; +} #endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */ CCL_NAMESPACE_END diff --git a/intern/cycles/util/types_uchar3_impl.h b/intern/cycles/util/types_uchar3_impl.h deleted file mode 100644 index 38a5d47af35..00000000000 --- a/intern/cycles/util/types_uchar3_impl.h +++ /dev/null @@ -1,37 +0,0 @@ -/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation - * - * SPDX-License-Identifier: Apache-2.0 */ - -#pragma once - -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util/types.h instead." -#endif - -CCL_NAMESPACE_BEGIN - -#ifndef __KERNEL_NATIVE_VECTOR_TYPES__ -# ifndef __KERNEL_GPU__ -uchar uchar3::operator[](int i) const -{ - util_assert(i >= 0); - util_assert(i < 3); - return *(&x + i); -} - -uchar &uchar3::operator[](int i) -{ - util_assert(i >= 0); - util_assert(i < 3); - return *(&x + i); -} -# endif - -ccl_device_inline uchar3 make_uchar3(uchar x, uchar y, uchar z) -{ - uchar3 a = {x, y, z}; - return a; -} -#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */ - -CCL_NAMESPACE_END diff --git a/intern/cycles/util/types_uchar4.h b/intern/cycles/util/types_uchar4.h index bf3cceb1442..7d2e7fc8042 100644 --- a/intern/cycles/util/types_uchar4.h +++ b/intern/cycles/util/types_uchar4.h @@ -4,9 +4,7 @@ #pragma once -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util/types.h instead." -#endif +#include "util/types_base.h" CCL_NAMESPACE_BEGIN @@ -15,12 +13,27 @@ struct uchar4 { uchar x, y, z, w; # ifndef __KERNEL_GPU__ - __forceinline uchar operator[](int i) const; - __forceinline uchar &operator[](int i); + __forceinline uchar operator[](int i) const + { + util_assert(i >= 0); + util_assert(i < 4); + return *(&x + i); + } + + __forceinline uchar &operator[](int i) + { + util_assert(i >= 0); + util_assert(i < 4); + return *(&x + i); + } # endif }; -ccl_device_inline uchar4 make_uchar4(uchar x, uchar y, uchar z, uchar w); +ccl_device_inline uchar4 make_uchar4(uchar x, uchar y, uchar z, uchar w) +{ + uchar4 a = {x, y, z, w}; + return a; +} #endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */ CCL_NAMESPACE_END diff --git a/intern/cycles/util/types_uchar4_impl.h b/intern/cycles/util/types_uchar4_impl.h deleted file mode 100644 index d83f32c198c..00000000000 --- a/intern/cycles/util/types_uchar4_impl.h +++ /dev/null @@ -1,37 +0,0 @@ -/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation - * - * SPDX-License-Identifier: Apache-2.0 */ - -#pragma once - -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util/types.h instead." -#endif - -CCL_NAMESPACE_BEGIN - -#ifndef __KERNEL_NATIVE_VECTOR_TYPES__ -# ifndef __KERNEL_GPU__ -uchar uchar4::operator[](int i) const -{ - util_assert(i >= 0); - util_assert(i < 4); - return *(&x + i); -} - -uchar &uchar4::operator[](int i) -{ - util_assert(i >= 0); - util_assert(i < 4); - return *(&x + i); -} -# endif - -ccl_device_inline uchar4 make_uchar4(uchar x, uchar y, uchar z, uchar w) -{ - uchar4 a = {x, y, z, w}; - return a; -} -#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */ - -CCL_NAMESPACE_END diff --git a/intern/cycles/util/types_uint2.h b/intern/cycles/util/types_uint2.h index 79b3c11dea9..b8edfc2eefe 100644 --- a/intern/cycles/util/types_uint2.h +++ b/intern/cycles/util/types_uint2.h @@ -4,9 +4,7 @@ #pragma once -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util/types.h instead." -#endif +#include "util/types_base.h" CCL_NAMESPACE_BEGIN @@ -15,12 +13,25 @@ struct uint2 { uint x, y; # ifndef __KERNEL_GPU__ - __forceinline uint operator[](uint i) const; - __forceinline uint &operator[](uint i); + __forceinline uint operator[](uint i) const + { + util_assert(i < 2); + return *(&x + i); + } + + __forceinline uint &operator[](uint i) + { + util_assert(i < 2); + return *(&x + i); + } # endif }; -ccl_device_inline uint2 make_uint2(uint x, uint y); +ccl_device_inline uint2 make_uint2(uint x, uint y) +{ + uint2 a = {x, y}; + return a; +} #endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */ CCL_NAMESPACE_END diff --git a/intern/cycles/util/types_uint2_impl.h b/intern/cycles/util/types_uint2_impl.h deleted file mode 100644 index dc7aa498d9e..00000000000 --- a/intern/cycles/util/types_uint2_impl.h +++ /dev/null @@ -1,35 +0,0 @@ -/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation - * - * SPDX-License-Identifier: Apache-2.0 */ - -#pragma once - -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util/types.h instead." -#endif - -CCL_NAMESPACE_BEGIN - -#ifndef __KERNEL_NATIVE_VECTOR_TYPES__ -# ifndef __KERNEL_GPU__ -__forceinline uint uint2::operator[](uint i) const -{ - util_assert(i < 2); - return *(&x + i); -} - -__forceinline uint &uint2::operator[](uint i) -{ - util_assert(i < 2); - return *(&x + i); -} -# endif - -ccl_device_inline uint2 make_uint2(uint x, uint y) -{ - uint2 a = {x, y}; - return a; -} -#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */ - -CCL_NAMESPACE_END diff --git a/intern/cycles/util/types_uint3.h b/intern/cycles/util/types_uint3.h index f3ee89e8929..905ff8e9a93 100644 --- a/intern/cycles/util/types_uint3.h +++ b/intern/cycles/util/types_uint3.h @@ -4,9 +4,7 @@ #pragma once -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util/types.h instead." -#endif +#include "util/types_base.h" CCL_NAMESPACE_BEGIN @@ -15,12 +13,25 @@ struct uint3 { uint x, y, z; # ifndef __KERNEL_GPU__ - __forceinline uint operator[](uint i) const; - __forceinline uint &operator[](uint i); + __forceinline uint operator[](uint i) const + { + util_assert(i < 3); + return *(&x + i); + } + + __forceinline uint &operator[](uint i) + { + util_assert(i < 3); + return *(&x + i); + } # endif }; -ccl_device_inline uint3 make_uint3(uint x, uint y, uint z); +ccl_device_inline uint3 make_uint3(uint x, uint y, uint z) +{ + uint3 a = {x, y, z}; + return a; +} #endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */ #if defined(__KERNEL_METAL__) @@ -33,7 +44,7 @@ typedef uint3 packed_uint3; struct packed_uint3 { uint x, y, z; - ccl_device_inline_method packed_uint3(){}; + ccl_device_inline_method packed_uint3() = default; ccl_device_inline_method packed_uint3(const uint px, const uint py, const uint pz) : x(px), y(py), z(pz){}; @@ -54,11 +65,27 @@ struct packed_uint3 { } # ifndef __KERNEL_GPU__ - __forceinline uint operator[](uint i) const; - __forceinline uint &operator[](uint i); + __forceinline uint operator[](uint i) const + { + util_assert(i < 3); + return *(&x + i); + } + + __forceinline uint &operator[](uint i) + { + util_assert(i < 3); + return *(&x + i); + } # endif }; static_assert(sizeof(packed_uint3) == 12, "packed_uint3 expected to be exactly 12 bytes"); #endif + +ccl_device_inline packed_uint3 make_packed_uint3(uint x, uint y, uint z) +{ + packed_uint3 a = {x, y, z}; + return a; +} + CCL_NAMESPACE_END diff --git a/intern/cycles/util/types_uint3_impl.h b/intern/cycles/util/types_uint3_impl.h deleted file mode 100644 index a86123435c5..00000000000 --- a/intern/cycles/util/types_uint3_impl.h +++ /dev/null @@ -1,53 +0,0 @@ -/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation - * - * SPDX-License-Identifier: Apache-2.0 */ - -#pragma once - -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util/types.h instead." -#endif - -CCL_NAMESPACE_BEGIN - -#ifndef __KERNEL_NATIVE_VECTOR_TYPES__ -# ifndef __KERNEL_GPU__ -__forceinline uint uint3::operator[](uint i) const -{ - util_assert(i < 3); - return *(&x + i); -} - -__forceinline uint &uint3::operator[](uint i) -{ - util_assert(i < 3); - return *(&x + i); -} - -__forceinline uint packed_uint3::operator[](uint i) const -{ - util_assert(i < 3); - return *(&x + i); -} - -__forceinline uint &packed_uint3::operator[](uint i) -{ - util_assert(i < 3); - return *(&x + i); -} -# endif - -ccl_device_inline uint3 make_uint3(uint x, uint y, uint z) -{ - uint3 a = {x, y, z}; - return a; -} - -ccl_device_inline packed_uint3 make_packed_uint3(uint x, uint y, uint z) -{ - packed_uint3 a = {x, y, z}; - return a; -} -#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */ - -CCL_NAMESPACE_END diff --git a/intern/cycles/util/types_uint4.h b/intern/cycles/util/types_uint4.h index f8383205183..01e3bbe9487 100644 --- a/intern/cycles/util/types_uint4.h +++ b/intern/cycles/util/types_uint4.h @@ -4,9 +4,7 @@ #pragma once -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util/types.h instead." -#endif +#include "util/types_base.h" CCL_NAMESPACE_BEGIN @@ -15,12 +13,25 @@ struct uint4 { uint x, y, z, w; # ifndef __KERNEL_GPU__ - __forceinline uint operator[](uint i) const; - __forceinline uint &operator[](uint i); + __forceinline uint operator[](uint i) const + { + util_assert(i < 3); + return *(&x + i); + } + + __forceinline uint &operator[](uint i) + { + util_assert(i < 3); + return *(&x + i); + } # endif }; -ccl_device_inline uint4 make_uint4(uint x, uint y, uint z, uint w); +ccl_device_inline uint4 make_uint4(uint x, uint y, uint z, uint w) +{ + uint4 a = {x, y, z, w}; + return a; +} #endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */ CCL_NAMESPACE_END diff --git a/intern/cycles/util/types_uint4_impl.h b/intern/cycles/util/types_uint4_impl.h deleted file mode 100644 index 65361fc0156..00000000000 --- a/intern/cycles/util/types_uint4_impl.h +++ /dev/null @@ -1,35 +0,0 @@ -/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation - * - * SPDX-License-Identifier: Apache-2.0 */ - -#pragma once - -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util/types.h instead." -#endif - -CCL_NAMESPACE_BEGIN - -#ifndef __KERNEL_NATIVE_VECTOR_TYPES__ -# ifndef __KERNEL_GPU__ -__forceinline uint uint4::operator[](uint i) const -{ - util_assert(i < 3); - return *(&x + i); -} - -__forceinline uint &uint4::operator[](uint i) -{ - util_assert(i < 3); - return *(&x + i); -} -# endif - -ccl_device_inline uint4 make_uint4(uint x, uint y, uint z, uint w) -{ - uint4 a = {x, y, z, w}; - return a; -} -#endif /* __KERNEL_NATIVE_VECTOR_TYPES__ */ - -CCL_NAMESPACE_END diff --git a/intern/cycles/util/types_ushort4.h b/intern/cycles/util/types_ushort4.h index c2fa5e76efb..65c8e315766 100644 --- a/intern/cycles/util/types_ushort4.h +++ b/intern/cycles/util/types_ushort4.h @@ -4,9 +4,7 @@ #pragma once -#ifndef __UTIL_TYPES_H__ -# error "Do not include this file directly, include util/types.h instead." -#endif +#include "util/defines.h" CCL_NAMESPACE_BEGIN