2022-02-11 13:53:21 +01:00
|
|
|
/* SPDX-License-Identifier: Apache-2.0
|
|
|
|
|
* Copyright 2011-2022 Blender Foundation */
|
2011-04-27 11:58:34 +00:00
|
|
|
|
|
|
|
|
#ifndef __UTIL_MATH_H__
|
|
|
|
|
#define __UTIL_MATH_H__
|
|
|
|
|
|
|
|
|
|
/* Math
|
|
|
|
|
*
|
|
|
|
|
* Basic math functions on scalar and vector types. This header is used by
|
|
|
|
|
* both the kernel code when compiled as C++, and other C++ non-kernel code. */
|
|
|
|
|
|
2017-01-20 11:55:48 +01:00
|
|
|
#ifndef __KERNEL_GPU__
|
|
|
|
|
# include <cmath>
|
|
|
|
|
#endif
|
|
|
|
|
|
2021-09-28 16:51:14 +02:00
|
|
|
#ifdef __HIP__
|
|
|
|
|
# include <hip/hip_vector_types.h>
|
|
|
|
|
#endif
|
|
|
|
|
|
2021-11-18 14:25:05 +01:00
|
|
|
#if !defined(__KERNEL_METAL__)
|
|
|
|
|
# include <float.h>
|
|
|
|
|
# include <math.h>
|
|
|
|
|
# include <stdio.h>
|
|
|
|
|
#endif /* !defined(__KERNEL_METAL__) */
|
2011-04-27 11:58:34 +00:00
|
|
|
|
2021-10-24 14:19:19 +02:00
|
|
|
#include "util/types.h"
|
2011-04-27 11:58:34 +00:00
|
|
|
|
|
|
|
|
CCL_NAMESPACE_BEGIN
|
|
|
|
|
|
2012-04-11 09:07:28 +00:00
|
|
|
/* Float Pi variations */
|
|
|
|
|
|
2013-05-12 14:13:29 +00:00
|
|
|
/* Division */
|
2011-09-02 00:10:03 +00:00
|
|
|
#ifndef M_PI_F
|
2017-04-14 14:05:23 +02:00
|
|
|
# define M_PI_F (3.1415926535897932f) /* pi */
|
2011-09-02 00:10:03 +00:00
|
|
|
#endif
|
|
|
|
|
#ifndef M_PI_2_F
|
2017-04-14 14:05:23 +02:00
|
|
|
# define M_PI_2_F (1.5707963267948966f) /* pi/2 */
|
2011-09-02 00:10:03 +00:00
|
|
|
#endif
|
|
|
|
|
#ifndef M_PI_4_F
|
2017-04-14 14:05:23 +02:00
|
|
|
# define M_PI_4_F (0.7853981633974830f) /* pi/4 */
|
2011-09-02 00:10:03 +00:00
|
|
|
#endif
|
|
|
|
|
#ifndef M_1_PI_F
|
2017-04-14 14:05:23 +02:00
|
|
|
# define M_1_PI_F (0.3183098861837067f) /* 1/pi */
|
2011-09-02 00:10:03 +00:00
|
|
|
#endif
|
|
|
|
|
#ifndef M_2_PI_F
|
2017-04-14 14:05:23 +02:00
|
|
|
# define M_2_PI_F (0.6366197723675813f) /* 2/pi */
|
2011-08-09 18:53:54 +00:00
|
|
|
#endif
|
2018-07-18 11:14:43 +02:00
|
|
|
#ifndef M_1_2PI_F
|
|
|
|
|
# define M_1_2PI_F (0.1591549430918953f) /* 1/(2*pi) */
|
|
|
|
|
#endif
|
|
|
|
|
#ifndef M_SQRT_PI_8_F
|
|
|
|
|
# define M_SQRT_PI_8_F (0.6266570686577501f) /* sqrt(pi/8) */
|
|
|
|
|
#endif
|
|
|
|
|
#ifndef M_LN_2PI_F
|
|
|
|
|
# define M_LN_2PI_F (1.8378770664093454f) /* ln(2*pi) */
|
|
|
|
|
#endif
|
2013-05-12 14:13:29 +00:00
|
|
|
|
|
|
|
|
/* Multiplication */
|
|
|
|
|
#ifndef M_2PI_F
|
2017-04-14 14:05:23 +02:00
|
|
|
# define M_2PI_F (6.2831853071795864f) /* 2*pi */
|
2013-05-12 14:13:29 +00:00
|
|
|
#endif
|
|
|
|
|
#ifndef M_4PI_F
|
2017-04-14 14:05:23 +02:00
|
|
|
# define M_4PI_F (12.566370614359172f) /* 4*pi */
|
2013-05-12 14:13:29 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
/* Float sqrt variations */
|
2012-04-30 12:49:26 +00:00
|
|
|
#ifndef M_SQRT2_F
|
2017-04-14 14:05:23 +02:00
|
|
|
# define M_SQRT2_F (1.4142135623730950f) /* sqrt(2) */
|
2012-04-30 12:49:26 +00:00
|
|
|
#endif
|
2022-02-25 20:03:25 +01:00
|
|
|
#ifndef M_SQRT3_F
|
|
|
|
|
# define M_SQRT3_F (1.7320508075688772f) /* sqrt(3) */
|
|
|
|
|
#endif
|
2015-02-06 15:40:07 +05:00
|
|
|
#ifndef M_LN2_F
|
2017-04-14 14:05:23 +02:00
|
|
|
# define M_LN2_F (0.6931471805599453f) /* ln(2) */
|
2015-02-06 15:40:07 +05:00
|
|
|
#endif
|
|
|
|
|
#ifndef M_LN10_F
|
2017-04-14 14:05:23 +02:00
|
|
|
# define M_LN10_F (2.3025850929940457f) /* ln(10) */
|
2015-02-06 15:40:07 +05:00
|
|
|
#endif
|
2011-08-09 18:53:54 +00:00
|
|
|
|
2011-04-27 11:58:34 +00:00
|
|
|
/* Scalar */
|
|
|
|
|
|
2021-09-28 16:51:14 +02:00
|
|
|
#ifndef __HIP__
|
|
|
|
|
# ifdef _WIN32
|
2013-11-16 00:17:10 +01:00
|
|
|
ccl_device_inline float fmaxf(float a, float b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
|
return (a > b) ? a : b;
|
|
|
|
|
}
|
|
|
|
|
|
2013-11-16 00:17:10 +01:00
|
|
|
ccl_device_inline float fminf(float a, float b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
|
return (a < b) ? a : b;
|
|
|
|
|
}
|
2021-09-28 16:51:14 +02:00
|
|
|
|
|
|
|
|
# endif /* _WIN32 */
|
|
|
|
|
#endif /* __HIP__ */
|
2012-04-11 09:07:28 +00:00
|
|
|
|
2011-04-27 17:23:37 +00:00
|
|
|
#ifndef __KERNEL_GPU__
|
2017-01-20 11:55:48 +01:00
|
|
|
using std::isfinite;
|
|
|
|
|
using std::isnan;
|
2017-08-02 02:23:03 +02:00
|
|
|
using std::sqrt;
|
2017-01-20 11:55:48 +01:00
|
|
|
|
2016-04-15 15:29:12 +02:00
|
|
|
ccl_device_inline int abs(int x)
|
|
|
|
|
{
|
|
|
|
|
return (x > 0) ? x : -x;
|
|
|
|
|
}
|
|
|
|
|
|
2013-11-16 00:17:10 +01:00
|
|
|
ccl_device_inline int max(int a, int b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
|
return (a > b) ? a : b;
|
|
|
|
|
}
|
|
|
|
|
|
2013-11-16 00:17:10 +01:00
|
|
|
ccl_device_inline int min(int a, int b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
|
return (a < b) ? a : b;
|
|
|
|
|
}
|
|
|
|
|
|
2022-02-10 10:37:00 +01:00
|
|
|
ccl_device_inline uint32_t max(uint32_t a, uint32_t b)
|
|
|
|
|
{
|
|
|
|
|
return (a > b) ? a : b;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ccl_device_inline uint32_t min(uint32_t a, uint32_t b)
|
|
|
|
|
{
|
|
|
|
|
return (a < b) ? a : b;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ccl_device_inline uint64_t max(uint64_t a, uint64_t b)
|
|
|
|
|
{
|
|
|
|
|
return (a > b) ? a : b;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ccl_device_inline uint64_t min(uint64_t a, uint64_t b)
|
|
|
|
|
{
|
|
|
|
|
return (a < b) ? a : b;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* NOTE: On 64bit Darwin the `size_t` is defined as `unsigned long int` and `uint64_t` is defined
|
|
|
|
|
* as `unsigned long long`. Both of the definitions are 64 bit unsigned integer, but the automatic
|
|
|
|
|
* substitution does not allow to automatically pick function defined for `uint64_t` as it is not
|
|
|
|
|
* exactly the same type definition.
|
|
|
|
|
* Work this around by adding a templated function enabled for `size_t` type which will be used
|
|
|
|
|
* when there is no explicit specialization of `min()`/`max()` above. */
|
|
|
|
|
|
|
|
|
|
template<class T>
|
|
|
|
|
ccl_device_inline typename std::enable_if_t<std::is_same_v<T, size_t>, T> max(T a, T b)
|
|
|
|
|
{
|
|
|
|
|
return (a > b) ? a : b;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template<class T>
|
|
|
|
|
ccl_device_inline typename std::enable_if_t<std::is_same_v<T, size_t>, T> min(T a, T b)
|
Cycles: merge of cycles-x branch, a major update to the renderer
This includes much improved GPU rendering performance, viewport interactivity,
new shadow catcher, revamped sampling settings, subsurface scattering anisotropy,
new GPU volume sampling, improved PMJ sampling pattern, and more.
Some features have also been removed or changed, breaking backwards compatibility.
Including the removal of the OpenCL backend, for which alternatives are under
development.
Release notes and code docs:
https://wiki.blender.org/wiki/Reference/Release_Notes/3.0/Cycles
https://wiki.blender.org/wiki/Source/Render/Cycles
Credits:
* Sergey Sharybin
* Brecht Van Lommel
* Patrick Mours (OptiX backend)
* Christophe Hery (subsurface scattering anisotropy)
* William Leeson (PMJ sampling pattern)
* Alaska (various fixes and tweaks)
* Thomas Dinges (various fixes)
For the full commit history, see the cycles-x branch. This squashes together
all the changes since intermediate changes would often fail building or tests.
Ref T87839, T87837, T87836
Fixes T90734, T89353, T80267, T80267, T77185, T69800
2021-09-20 17:59:20 +02:00
|
|
|
{
|
|
|
|
|
return (a < b) ? a : b;
|
|
|
|
|
}
|
|
|
|
|
|
2013-11-16 00:17:10 +01:00
|
|
|
ccl_device_inline float max(float a, float b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
|
return (a > b) ? a : b;
|
|
|
|
|
}
|
|
|
|
|
|
2013-11-16 00:17:10 +01:00
|
|
|
ccl_device_inline float min(float a, float b)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
|
return (a < b) ? a : b;
|
|
|
|
|
}
|
|
|
|
|
|
2013-11-16 00:17:10 +01:00
|
|
|
ccl_device_inline double max(double a, double b)
|
2011-09-05 12:24:28 +00:00
|
|
|
{
|
|
|
|
|
return (a > b) ? a : b;
|
|
|
|
|
}
|
|
|
|
|
|
2013-11-16 00:17:10 +01:00
|
|
|
ccl_device_inline double min(double a, double b)
|
2011-09-05 12:24:28 +00:00
|
|
|
{
|
|
|
|
|
return (a < b) ? a : b;
|
|
|
|
|
}
|
|
|
|
|
|
2014-12-16 20:27:44 +05:00
|
|
|
/* These 2 guys are templated for usage with registers data.
|
|
|
|
|
*
|
|
|
|
|
* NOTE: Since this is CPU-only functions it is ok to use references here.
|
|
|
|
|
* But for other devices we'll need to be careful about this.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
template<typename T> ccl_device_inline T min4(const T &a, const T &b, const T &c, const T &d)
|
|
|
|
|
{
|
|
|
|
|
return min(min(a, b), min(c, d));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template<typename T> ccl_device_inline T max4(const T &a, const T &b, const T &c, const T &d)
|
|
|
|
|
{
|
|
|
|
|
return max(max(a, b), max(c, d));
|
|
|
|
|
}
|
2017-04-14 14:05:23 +02:00
|
|
|
#endif /* __KERNEL_GPU__ */
|
2011-04-27 11:58:34 +00:00
|
|
|
|
2013-11-16 00:17:10 +01:00
|
|
|
ccl_device_inline float min4(float a, float b, float c, float d)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
2011-11-21 16:28:19 +00:00
|
|
|
return min(min(a, b), min(c, d));
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
|
2013-11-16 00:17:10 +01:00
|
|
|
ccl_device_inline float max4(float a, float b, float c, float d)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
2011-11-21 16:28:19 +00:00
|
|
|
return max(max(a, b), max(c, d));
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
|
2021-11-18 14:25:05 +01:00
|
|
|
#if !defined(__KERNEL_METAL__)
|
2017-05-07 14:40:58 +02:00
|
|
|
/* Int/Float conversion */
|
|
|
|
|
|
|
|
|
|
ccl_device_inline int as_int(uint i)
|
|
|
|
|
{
|
|
|
|
|
union {
|
|
|
|
|
uint ui;
|
|
|
|
|
int i;
|
|
|
|
|
} u;
|
|
|
|
|
u.ui = i;
|
|
|
|
|
return u.i;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ccl_device_inline uint as_uint(int i)
|
|
|
|
|
{
|
|
|
|
|
union {
|
|
|
|
|
uint ui;
|
|
|
|
|
int i;
|
|
|
|
|
} u;
|
|
|
|
|
u.i = i;
|
|
|
|
|
return u.ui;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ccl_device_inline uint as_uint(float f)
|
|
|
|
|
{
|
|
|
|
|
union {
|
|
|
|
|
uint i;
|
|
|
|
|
float f;
|
|
|
|
|
} u;
|
|
|
|
|
u.f = f;
|
|
|
|
|
return u.i;
|
|
|
|
|
}
|
|
|
|
|
|
2021-11-18 14:25:05 +01:00
|
|
|
# ifndef __HIP__
|
2017-05-07 14:40:58 +02:00
|
|
|
ccl_device_inline int __float_as_int(float f)
|
|
|
|
|
{
|
|
|
|
|
union {
|
|
|
|
|
int i;
|
|
|
|
|
float f;
|
|
|
|
|
} u;
|
|
|
|
|
u.f = f;
|
|
|
|
|
return u.i;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ccl_device_inline float __int_as_float(int i)
|
|
|
|
|
{
|
|
|
|
|
union {
|
|
|
|
|
int i;
|
|
|
|
|
float f;
|
|
|
|
|
} u;
|
|
|
|
|
u.i = i;
|
|
|
|
|
return u.f;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ccl_device_inline uint __float_as_uint(float f)
|
|
|
|
|
{
|
|
|
|
|
union {
|
|
|
|
|
uint i;
|
|
|
|
|
float f;
|
|
|
|
|
} u;
|
|
|
|
|
u.f = f;
|
|
|
|
|
return u.i;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ccl_device_inline float __uint_as_float(uint i)
|
|
|
|
|
{
|
|
|
|
|
union {
|
|
|
|
|
uint i;
|
|
|
|
|
float f;
|
|
|
|
|
} u;
|
|
|
|
|
u.i = i;
|
|
|
|
|
return u.f;
|
|
|
|
|
}
|
2021-11-18 14:25:05 +01:00
|
|
|
# endif
|
2018-10-06 20:39:01 +02:00
|
|
|
|
|
|
|
|
ccl_device_inline int4 __float4_as_int4(float4 f)
|
|
|
|
|
{
|
2021-11-18 14:25:05 +01:00
|
|
|
# ifdef __KERNEL_SSE__
|
2018-10-06 20:39:01 +02:00
|
|
|
return int4(_mm_castps_si128(f.m128));
|
2021-11-18 14:25:05 +01:00
|
|
|
# else
|
2018-10-06 20:39:01 +02:00
|
|
|
return make_int4(
|
|
|
|
|
__float_as_int(f.x), __float_as_int(f.y), __float_as_int(f.z), __float_as_int(f.w));
|
2021-11-18 14:25:05 +01:00
|
|
|
# endif
|
2018-10-06 20:39:01 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ccl_device_inline float4 __int4_as_float4(int4 i)
|
|
|
|
|
{
|
2021-11-18 14:25:05 +01:00
|
|
|
# ifdef __KERNEL_SSE__
|
2018-10-06 20:39:01 +02:00
|
|
|
return float4(_mm_castsi128_ps(i.m128));
|
2021-11-18 14:25:05 +01:00
|
|
|
# else
|
2018-10-06 20:39:01 +02:00
|
|
|
return make_float4(
|
|
|
|
|
__int_as_float(i.x), __int_as_float(i.y), __int_as_float(i.z), __int_as_float(i.w));
|
2021-11-18 14:25:05 +01:00
|
|
|
# endif
|
2018-10-06 20:39:01 +02:00
|
|
|
}
|
2021-11-18 14:25:05 +01:00
|
|
|
#endif /* !defined(__KERNEL_METAL__) */
|
2017-05-07 14:40:58 +02:00
|
|
|
|
2021-11-18 14:25:05 +01:00
|
|
|
#if defined(__KERNEL_METAL__)
|
2022-06-23 14:29:17 +02:00
|
|
|
ccl_device_forceinline bool isnan_safe(float f)
|
|
|
|
|
{
|
|
|
|
|
return isnan(f);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ccl_device_forceinline bool isfinite_safe(float f)
|
|
|
|
|
{
|
|
|
|
|
return isfinite(f);
|
|
|
|
|
}
|
2021-11-18 14:25:05 +01:00
|
|
|
#else
|
2021-10-14 17:51:27 +02:00
|
|
|
template<typename T> ccl_device_inline uint pointer_pack_to_uint_0(T *ptr)
|
|
|
|
|
{
|
|
|
|
|
return ((uint64_t)ptr) & 0xFFFFFFFF;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template<typename T> ccl_device_inline uint pointer_pack_to_uint_1(T *ptr)
|
|
|
|
|
{
|
|
|
|
|
return (((uint64_t)ptr) >> 32) & 0xFFFFFFFF;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template<typename T> ccl_device_inline T *pointer_unpack_from_uint(const uint a, const uint b)
|
|
|
|
|
{
|
|
|
|
|
return (T *)(((uint64_t)b << 32) | a);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ccl_device_inline uint uint16_pack_to_uint(const uint a, const uint b)
|
|
|
|
|
{
|
|
|
|
|
return (a << 16) | b;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ccl_device_inline uint uint16_unpack_from_uint_0(const uint i)
|
|
|
|
|
{
|
|
|
|
|
return i >> 16;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ccl_device_inline uint uint16_unpack_from_uint_1(const uint i)
|
|
|
|
|
{
|
|
|
|
|
return i & 0xFFFF;
|
|
|
|
|
}
|
|
|
|
|
|
2017-05-07 14:40:58 +02:00
|
|
|
/* Versions of functions which are safe for fast math. */
|
|
|
|
|
ccl_device_inline bool isnan_safe(float f)
|
|
|
|
|
{
|
|
|
|
|
unsigned int x = __float_as_uint(f);
|
|
|
|
|
return (x << 1) > 0xff000000u;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ccl_device_inline bool isfinite_safe(float f)
|
|
|
|
|
{
|
|
|
|
|
/* By IEEE 754 rule, 2*Inf equals Inf */
|
|
|
|
|
unsigned int x = __float_as_uint(f);
|
2017-09-04 13:28:15 +02:00
|
|
|
return (f == f) && (x == 0 || x == (1u << 31) || (f != 2.0f * f)) && !((x << 1) > 0xff000000u);
|
2017-05-07 14:40:58 +02:00
|
|
|
}
|
2021-11-18 14:25:05 +01:00
|
|
|
#endif
|
2017-05-07 14:40:58 +02:00
|
|
|
|
|
|
|
|
ccl_device_inline float ensure_finite(float v)
|
|
|
|
|
{
|
|
|
|
|
return isfinite_safe(v) ? v : 0.0f;
|
|
|
|
|
}
|
|
|
|
|
|
2021-11-18 14:25:05 +01:00
|
|
|
#if !defined(__KERNEL_METAL__)
|
2013-11-16 00:17:10 +01:00
|
|
|
ccl_device_inline int clamp(int a, int mn, int mx)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
|
return min(max(a, mn), mx);
|
|
|
|
|
}
|
|
|
|
|
|
2013-11-16 00:17:10 +01:00
|
|
|
ccl_device_inline float clamp(float a, float mn, float mx)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
|
return min(max(a, mn), mx);
|
|
|
|
|
}
|
|
|
|
|
|
2016-07-16 19:42:28 -04:00
|
|
|
ccl_device_inline float mix(float a, float b, float t)
|
|
|
|
|
{
|
|
|
|
|
return a + t * (b - a);
|
|
|
|
|
}
|
2019-09-12 13:09:31 +02:00
|
|
|
|
|
|
|
|
ccl_device_inline float smoothstep(float edge0, float edge1, float x)
|
|
|
|
|
{
|
|
|
|
|
float result;
|
|
|
|
|
if (x < edge0)
|
|
|
|
|
result = 0.0f;
|
|
|
|
|
else if (x >= edge1)
|
|
|
|
|
result = 1.0f;
|
|
|
|
|
else {
|
|
|
|
|
float t = (x - edge0) / (edge1 - edge0);
|
|
|
|
|
result = (3.0f - 2.0f * t) * (t * t);
|
|
|
|
|
}
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
2021-11-18 14:25:05 +01:00
|
|
|
#endif /* !defined(__KERNEL_METAL__) */
|
|
|
|
|
|
|
|
|
|
#if defined(__KERNEL_CUDA__)
|
2021-10-27 13:28:13 +02:00
|
|
|
ccl_device_inline float saturatef(float a)
|
2015-04-28 00:13:03 +05:00
|
|
|
{
|
2021-11-18 14:25:05 +01:00
|
|
|
return __saturatef(a);
|
2015-04-28 00:13:03 +05:00
|
|
|
}
|
2021-11-18 14:25:05 +01:00
|
|
|
#elif !defined(__KERNEL_METAL__)
|
2021-10-27 13:28:13 +02:00
|
|
|
ccl_device_inline float saturatef(float a)
|
|
|
|
|
{
|
2021-11-18 14:25:05 +01:00
|
|
|
return clamp(a, 0.0f, 1.0f);
|
2021-10-27 13:28:13 +02:00
|
|
|
}
|
2017-04-14 14:05:23 +02:00
|
|
|
#endif /* __KERNEL_CUDA__ */
|
2015-04-28 00:13:03 +05:00
|
|
|
|
2013-11-16 00:17:10 +01:00
|
|
|
ccl_device_inline int float_to_int(float f)
|
2013-06-07 16:06:17 +00:00
|
|
|
{
|
|
|
|
|
return (int)f;
|
|
|
|
|
}
|
|
|
|
|
|
2013-11-16 00:17:10 +01:00
|
|
|
ccl_device_inline int floor_to_int(float f)
|
2013-06-07 16:06:17 +00:00
|
|
|
{
|
|
|
|
|
return float_to_int(floorf(f));
|
|
|
|
|
}
|
|
|
|
|
|
2018-07-14 15:38:58 +02:00
|
|
|
ccl_device_inline int quick_floor_to_int(float x)
|
|
|
|
|
{
|
|
|
|
|
return float_to_int(x) - ((x < 0) ? 1 : 0);
|
|
|
|
|
}
|
|
|
|
|
|
Cycles: Kernel address space changes for MSL
This is the first of a sequence of changes to support compiling Cycles kernels as MSL (Metal Shading Language) in preparation for a Metal GPU device implementation.
MSL requires that all pointer types be declared with explicit address space attributes (device, thread, etc...). There is already precedent for this with Cycles' address space macros (ccl_global, ccl_private, etc...), therefore the first step of MSL-enablement is to apply these consistently. Line-for-line this represents the largest change required to enable MSL. Applying this change first will simplify future patches as well as offering the emergent benefit of enhanced descriptiveness.
The vast majority of deltas in this patch fall into one of two cases:
- Ensuring ccl_private is specified for thread-local pointer types
- Ensuring ccl_global is specified for device-wide pointer types
Additionally, the ccl_addr_space qualifier can be removed. Prior to Cycles X, ccl_addr_space was used as a context-dependent address space qualifier, but now it is either redundant (e.g. in struct typedefs), or can be replaced by ccl_global in the case of pointer types. Associated function variants (e.g. lcg_step_float_addrspace) are also redundant.
In cases where address space qualifiers are chained with "const", this patch places the address space qualifier first. The rationale for this is that the choice of address space is likely to have the greater impact on runtime performance and overall architecture.
The final part of this patch is the addition of a metal/compat.h header. This is partially complete and will be extended in future patches, paving the way for the full Metal implementation.
Ref T92212
Reviewed By: brecht
Maniphest Tasks: T92212
Differential Revision: https://developer.blender.org/D12864
2021-10-14 13:53:40 +01:00
|
|
|
ccl_device_inline float floorfrac(float x, ccl_private int *i)
|
2019-09-04 17:54:32 +02:00
|
|
|
{
|
|
|
|
|
*i = quick_floor_to_int(x);
|
|
|
|
|
return x - *i;
|
|
|
|
|
}
|
|
|
|
|
|
2013-11-16 00:17:10 +01:00
|
|
|
ccl_device_inline int ceil_to_int(float f)
|
2013-06-07 16:06:17 +00:00
|
|
|
{
|
|
|
|
|
return float_to_int(ceilf(f));
|
|
|
|
|
}
|
|
|
|
|
|
Maths Node: Additional functions
When creating shaders and using maths functions it is expected that Blender should match functions in other DCC applications, game engines and shading languages such as GLSL and OSL.
This patch adds missing functions to the Blender maths node.
Ideally, it would be nice to have these functions available to vectors too but that is not part of this patch.
This patch adds the following functions trunc, snap, wrap, compare, pingpong, sign, radians, degrees, cosh, sinh, tanh, exp, smoothmin and inversesqrt.
Sign function is based on GLSL and OSL functions and returns zero when x == 0.
Differential Revision: https://developer.blender.org/D5957
2019-12-05 23:02:05 +00:00
|
|
|
ccl_device_inline float fractf(float x)
|
|
|
|
|
{
|
|
|
|
|
return x - floorf(x);
|
|
|
|
|
}
|
|
|
|
|
|
2022-01-26 16:06:22 +11:00
|
|
|
/* Adapted from `godot-engine` math_funcs.h. */
|
Maths Node: Additional functions
When creating shaders and using maths functions it is expected that Blender should match functions in other DCC applications, game engines and shading languages such as GLSL and OSL.
This patch adds missing functions to the Blender maths node.
Ideally, it would be nice to have these functions available to vectors too but that is not part of this patch.
This patch adds the following functions trunc, snap, wrap, compare, pingpong, sign, radians, degrees, cosh, sinh, tanh, exp, smoothmin and inversesqrt.
Sign function is based on GLSL and OSL functions and returns zero when x == 0.
Differential Revision: https://developer.blender.org/D5957
2019-12-05 23:02:05 +00:00
|
|
|
ccl_device_inline float wrapf(float value, float max, float min)
|
|
|
|
|
{
|
|
|
|
|
float range = max - min;
|
|
|
|
|
return (range != 0.0f) ? value - (range * floorf((value - min) / range)) : min;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ccl_device_inline float pingpongf(float a, float b)
|
|
|
|
|
{
|
|
|
|
|
return (b != 0.0f) ? fabsf(fractf((a - b) / (b * 2.0f)) * b * 2.0f - b) : 0.0f;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ccl_device_inline float smoothminf(float a, float b, float k)
|
|
|
|
|
{
|
2019-12-21 03:28:22 +00:00
|
|
|
if (k != 0.0f) {
|
|
|
|
|
float h = fmaxf(k - fabsf(a - b), 0.0f) / k;
|
|
|
|
|
return fminf(a, b) - h * h * h * k * (1.0f / 6.0f);
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
return fminf(a, b);
|
|
|
|
|
}
|
Maths Node: Additional functions
When creating shaders and using maths functions it is expected that Blender should match functions in other DCC applications, game engines and shading languages such as GLSL and OSL.
This patch adds missing functions to the Blender maths node.
Ideally, it would be nice to have these functions available to vectors too but that is not part of this patch.
This patch adds the following functions trunc, snap, wrap, compare, pingpong, sign, radians, degrees, cosh, sinh, tanh, exp, smoothmin and inversesqrt.
Sign function is based on GLSL and OSL functions and returns zero when x == 0.
Differential Revision: https://developer.blender.org/D5957
2019-12-05 23:02:05 +00:00
|
|
|
}
|
|
|
|
|
|
2013-11-16 00:17:10 +01:00
|
|
|
ccl_device_inline float signf(float f)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
|
|
|
|
return (f < 0.0f) ? -1.0f : 1.0f;
|
|
|
|
|
}
|
|
|
|
|
|
2013-11-16 00:17:10 +01:00
|
|
|
ccl_device_inline float nonzerof(float f, float eps)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
2014-05-08 04:53:05 +10:00
|
|
|
if (fabsf(f) < eps)
|
2011-04-27 11:58:34 +00:00
|
|
|
return signf(f) * eps;
|
|
|
|
|
else
|
|
|
|
|
return f;
|
|
|
|
|
}
|
|
|
|
|
|
2021-02-05 16:23:34 +11:00
|
|
|
/* `signum` function testing for zero. Matches GLSL and OSL functions. */
|
Maths Node: Additional functions
When creating shaders and using maths functions it is expected that Blender should match functions in other DCC applications, game engines and shading languages such as GLSL and OSL.
This patch adds missing functions to the Blender maths node.
Ideally, it would be nice to have these functions available to vectors too but that is not part of this patch.
This patch adds the following functions trunc, snap, wrap, compare, pingpong, sign, radians, degrees, cosh, sinh, tanh, exp, smoothmin and inversesqrt.
Sign function is based on GLSL and OSL functions and returns zero when x == 0.
Differential Revision: https://developer.blender.org/D5957
2019-12-05 23:02:05 +00:00
|
|
|
ccl_device_inline float compatible_signf(float f)
|
|
|
|
|
{
|
|
|
|
|
if (f == 0.0f) {
|
|
|
|
|
return 0.0f;
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
return signf(f);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2013-11-16 00:17:10 +01:00
|
|
|
ccl_device_inline float smoothstepf(float f)
|
2012-06-04 17:17:10 +00:00
|
|
|
{
|
|
|
|
|
float ff = f * f;
|
|
|
|
|
return (3.0f * ff - 2.0f * ff * f);
|
|
|
|
|
}
|
|
|
|
|
|
2016-07-16 19:42:28 -04:00
|
|
|
ccl_device_inline int mod(int x, int m)
|
|
|
|
|
{
|
|
|
|
|
return (x % m + m) % m;
|
|
|
|
|
}
|
|
|
|
|
|
2017-04-14 14:05:23 +02:00
|
|
|
ccl_device_inline float3 float2_to_float3(const float2 a)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
2017-04-14 14:05:23 +02:00
|
|
|
return make_float3(a.x, a.y, 0.0f);
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
|
2017-04-14 14:05:23 +02:00
|
|
|
ccl_device_inline float3 float4_to_float3(const float4 a)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
2017-04-14 14:05:23 +02:00
|
|
|
return make_float3(a.x, a.y, a.z);
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
|
2017-04-14 14:05:23 +02:00
|
|
|
ccl_device_inline float4 float3_to_float4(const float3 a)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
2017-04-14 14:05:23 +02:00
|
|
|
return make_float4(a.x, a.y, a.z, 1.0f);
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
|
Cycles: Add Support for IES files as textures for light strength
This patch adds support for IES files, a file format that is commonly used to store the directional intensity distribution of light sources.
The new IES node is supposed to be plugged into the Strength input of the Emission node of the lamp.
Since people generating IES files do not really seem to care about the standard, the parser is flexible enough to accept all test files I have tried.
Some common weirdnesses are distributing values over multiple lines that should go into one line, using commas instead of spaces as delimiters and adding various useless stuff at the end of the file.
The user interface of the node is similar to the script node, the user can either select an internal Text or load a file.
Internally, IES files are handled similar to Image textures: They are stored in slots by the LightManager and each unique IES is assigned to one slot.
The local coordinate system of the lamp is used, so that the direction of the light can be changed. For UI reasons, it's usually best to add an area light,
rotate it and then change its type, since especially the point light does not immediately show its local coordinate system in the viewport.
Reviewers: #cycles, dingto, sergey, brecht
Reviewed By: #cycles, dingto, brecht
Subscribers: OgDEV, crazyrobinhood, secundar, cardboard, pisuke, intrah, swerner, micah_denn, harvester, gottfried, disnel, campbellbarton, duarteframos, Lapineige, brecht, juicyfruit, dingto, marek, rickyblender, bliblubli, lockal, sergey
Differential Revision: https://developer.blender.org/D1543
2018-05-27 00:46:37 +02:00
|
|
|
ccl_device_inline float inverse_lerp(float a, float b, float x)
|
|
|
|
|
{
|
|
|
|
|
return (x - a) / (b - a);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Cubic interpolation between b and c, a and d are the previous and next point. */
|
|
|
|
|
ccl_device_inline float cubic_interp(float a, float b, float c, float d, float x)
|
|
|
|
|
{
|
|
|
|
|
return 0.5f *
|
|
|
|
|
(((d + 3.0f * (b - c) - a) * x + (2.0f * a - 5.0f * b + 4.0f * c - d)) * x +
|
|
|
|
|
(c - a)) *
|
|
|
|
|
x +
|
|
|
|
|
b;
|
|
|
|
|
}
|
|
|
|
|
|
2017-04-14 14:05:23 +02:00
|
|
|
CCL_NAMESPACE_END
|
2011-04-27 11:58:34 +00:00
|
|
|
|
2021-10-24 14:19:19 +02:00
|
|
|
#include "util/math_int2.h"
|
|
|
|
|
#include "util/math_int3.h"
|
|
|
|
|
#include "util/math_int4.h"
|
2011-04-27 11:58:34 +00:00
|
|
|
|
2021-10-24 14:19:19 +02:00
|
|
|
#include "util/math_float2.h"
|
|
|
|
|
#include "util/math_float3.h"
|
|
|
|
|
#include "util/math_float4.h"
|
2011-04-27 11:58:34 +00:00
|
|
|
|
2021-10-24 14:19:19 +02:00
|
|
|
#include "util/rect.h"
|
Cycles: Improve denoising speed on GPUs with small tile sizes
Previously, the NLM kernels would be launched once per offset with one thread per pixel.
However, with the smaller tile sizes that are now feasible, there wasn't enough work to fully occupy GPUs which results in a significant slowdown.
Therefore, the kernels are now launched in a single call that handles all offsets at once.
This has two downsides: Memory accesses to accumulating buffers are now atomic, and more importantly, the temporary memory now has to be allocated for every shift at once, increasing the required memory.
On the other hand, of course, the smaller tiles significantly reduce the size of the memory.
The main bottleneck right now is the construction of the transformation - there is nothing to be parallelized there, one thread per pixel is the maximum.
I tried to parallelize the SVD implementation by storing the matrix in shared memory and launching one block per pixel, but that wasn't really going anywhere.
To make the new code somewhat readable, the handling of rectangular regions was cleaned up a bit and commented, it should be easier to understand what's going on now.
Also, some variables have been renamed to make the difference between buffer width and stride more apparent, in addition to some general style cleanup.
2017-11-10 04:34:14 +01:00
|
|
|
|
2017-04-14 14:05:23 +02:00
|
|
|
CCL_NAMESPACE_BEGIN
|
2011-04-27 11:58:34 +00:00
|
|
|
|
2021-11-18 14:25:05 +01:00
|
|
|
#if !defined(__KERNEL_METAL__)
|
2017-04-14 14:05:23 +02:00
|
|
|
/* Interpolation */
|
2014-11-08 13:35:21 +01:00
|
|
|
|
2017-04-14 14:05:23 +02:00
|
|
|
template<class A, class B> A lerp(const A &a, const A &b, const B &t)
|
2016-08-09 13:20:08 +03:00
|
|
|
{
|
2017-04-14 14:05:23 +02:00
|
|
|
return (A)(a * ((B)1 - t) + b * t);
|
2016-08-09 13:20:08 +03:00
|
|
|
}
|
|
|
|
|
|
2021-11-18 14:25:05 +01:00
|
|
|
#endif /* __KERNEL_METAL__ */
|
|
|
|
|
|
2017-04-14 14:05:23 +02:00
|
|
|
/* Triangle */
|
2011-04-27 11:58:34 +00:00
|
|
|
|
Cycles: Kernel address space changes for MSL
This is the first of a sequence of changes to support compiling Cycles kernels as MSL (Metal Shading Language) in preparation for a Metal GPU device implementation.
MSL requires that all pointer types be declared with explicit address space attributes (device, thread, etc...). There is already precedent for this with Cycles' address space macros (ccl_global, ccl_private, etc...), therefore the first step of MSL-enablement is to apply these consistently. Line-for-line this represents the largest change required to enable MSL. Applying this change first will simplify future patches as well as offering the emergent benefit of enhanced descriptiveness.
The vast majority of deltas in this patch fall into one of two cases:
- Ensuring ccl_private is specified for thread-local pointer types
- Ensuring ccl_global is specified for device-wide pointer types
Additionally, the ccl_addr_space qualifier can be removed. Prior to Cycles X, ccl_addr_space was used as a context-dependent address space qualifier, but now it is either redundant (e.g. in struct typedefs), or can be replaced by ccl_global in the case of pointer types. Associated function variants (e.g. lcg_step_float_addrspace) are also redundant.
In cases where address space qualifiers are chained with "const", this patch places the address space qualifier first. The rationale for this is that the choice of address space is likely to have the greater impact on runtime performance and overall architecture.
The final part of this patch is the addition of a metal/compat.h header. This is partially complete and will be extended in future patches, paving the way for the full Metal implementation.
Ref T92212
Reviewed By: brecht
Maniphest Tasks: T92212
Differential Revision: https://developer.blender.org/D12864
2021-10-14 13:53:40 +01:00
|
|
|
ccl_device_inline float triangle_area(ccl_private const float3 &v1,
|
|
|
|
|
ccl_private const float3 &v2,
|
|
|
|
|
ccl_private const float3 &v3)
|
2011-04-27 11:58:34 +00:00
|
|
|
{
|
2017-04-14 14:05:23 +02:00
|
|
|
return len(cross(v3 - v2, v1 - v2)) * 0.5f;
|
2011-04-27 11:58:34 +00:00
|
|
|
}
|
|
|
|
|
|
2011-09-27 20:37:24 +00:00
|
|
|
/* Orthonormal vectors */
|
|
|
|
|
|
Cycles: Kernel address space changes for MSL
This is the first of a sequence of changes to support compiling Cycles kernels as MSL (Metal Shading Language) in preparation for a Metal GPU device implementation.
MSL requires that all pointer types be declared with explicit address space attributes (device, thread, etc...). There is already precedent for this with Cycles' address space macros (ccl_global, ccl_private, etc...), therefore the first step of MSL-enablement is to apply these consistently. Line-for-line this represents the largest change required to enable MSL. Applying this change first will simplify future patches as well as offering the emergent benefit of enhanced descriptiveness.
The vast majority of deltas in this patch fall into one of two cases:
- Ensuring ccl_private is specified for thread-local pointer types
- Ensuring ccl_global is specified for device-wide pointer types
Additionally, the ccl_addr_space qualifier can be removed. Prior to Cycles X, ccl_addr_space was used as a context-dependent address space qualifier, but now it is either redundant (e.g. in struct typedefs), or can be replaced by ccl_global in the case of pointer types. Associated function variants (e.g. lcg_step_float_addrspace) are also redundant.
In cases where address space qualifiers are chained with "const", this patch places the address space qualifier first. The rationale for this is that the choice of address space is likely to have the greater impact on runtime performance and overall architecture.
The final part of this patch is the addition of a metal/compat.h header. This is partially complete and will be extended in future patches, paving the way for the full Metal implementation.
Ref T92212
Reviewed By: brecht
Maniphest Tasks: T92212
Differential Revision: https://developer.blender.org/D12864
2021-10-14 13:53:40 +01:00
|
|
|
ccl_device_inline void make_orthonormals(const float3 N,
|
|
|
|
|
ccl_private float3 *a,
|
|
|
|
|
ccl_private float3 *b)
|
2011-09-27 20:37:24 +00:00
|
|
|
{
|
2013-08-18 14:15:57 +00:00
|
|
|
#if 0
|
2019-04-17 08:16:53 +02:00
|
|
|
if (fabsf(N.y) >= 0.999f) {
|
2013-08-18 14:15:57 +00:00
|
|
|
*a = make_float3(1, 0, 0);
|
|
|
|
|
*b = make_float3(0, 0, 1);
|
|
|
|
|
return;
|
|
|
|
|
}
|
2019-04-17 08:16:53 +02:00
|
|
|
if (fabsf(N.z) >= 0.999f) {
|
2013-08-18 14:15:57 +00:00
|
|
|
*a = make_float3(1, 0, 0);
|
|
|
|
|
*b = make_float3(0, 1, 0);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
2011-09-27 20:37:24 +00:00
|
|
|
if (N.x != N.y || N.x != N.z)
|
|
|
|
|
*a = make_float3(N.z - N.y, N.x - N.z, N.y - N.x); //(1,1,1)x N
|
|
|
|
|
else
|
|
|
|
|
*a = make_float3(N.z - N.y, N.x + N.z, -N.y - N.x); //(-1,1,1)x N
|
|
|
|
|
|
|
|
|
|
*a = normalize(*a);
|
|
|
|
|
*b = cross(N, *a);
|
|
|
|
|
}
|
|
|
|
|
|
2012-03-28 12:18:12 +00:00
|
|
|
/* Color division */
|
|
|
|
|
|
2014-03-29 13:03:50 +01:00
|
|
|
ccl_device_inline float3 safe_invert_color(float3 a)
|
|
|
|
|
{
|
|
|
|
|
float x, y, z;
|
|
|
|
|
|
|
|
|
|
x = (a.x != 0.0f) ? 1.0f / a.x : 0.0f;
|
|
|
|
|
y = (a.y != 0.0f) ? 1.0f / a.y : 0.0f;
|
|
|
|
|
z = (a.z != 0.0f) ? 1.0f / a.z : 0.0f;
|
|
|
|
|
|
|
|
|
|
return make_float3(x, y, z);
|
|
|
|
|
}
|
|
|
|
|
|
2013-11-16 00:17:10 +01:00
|
|
|
ccl_device_inline float3 safe_divide_color(float3 a, float3 b)
|
2012-03-28 12:18:12 +00:00
|
|
|
{
|
|
|
|
|
float x, y, z;
|
|
|
|
|
|
|
|
|
|
x = (b.x != 0.0f) ? a.x / b.x : 0.0f;
|
|
|
|
|
y = (b.y != 0.0f) ? a.y / b.y : 0.0f;
|
|
|
|
|
z = (b.z != 0.0f) ? a.z / b.z : 0.0f;
|
|
|
|
|
|
|
|
|
|
return make_float3(x, y, z);
|
|
|
|
|
}
|
|
|
|
|
|
2013-11-16 00:17:10 +01:00
|
|
|
ccl_device_inline float3 safe_divide_even_color(float3 a, float3 b)
|
2013-07-08 23:31:45 +00:00
|
|
|
{
|
|
|
|
|
float x, y, z;
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2013-07-08 23:31:45 +00:00
|
|
|
x = (b.x != 0.0f) ? a.x / b.x : 0.0f;
|
|
|
|
|
y = (b.y != 0.0f) ? a.y / b.y : 0.0f;
|
|
|
|
|
z = (b.z != 0.0f) ? a.z / b.z : 0.0f;
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2017-02-27 19:33:57 -05:00
|
|
|
/* try to get gray even if b is zero */
|
2013-07-08 23:31:45 +00:00
|
|
|
if (b.x == 0.0f) {
|
|
|
|
|
if (b.y == 0.0f) {
|
|
|
|
|
x = z;
|
|
|
|
|
y = z;
|
|
|
|
|
}
|
|
|
|
|
else if (b.z == 0.0f) {
|
|
|
|
|
x = y;
|
|
|
|
|
z = y;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
x = 0.5f * (y + z);
|
|
|
|
|
}
|
|
|
|
|
else if (b.y == 0.0f) {
|
|
|
|
|
if (b.z == 0.0f) {
|
|
|
|
|
y = x;
|
|
|
|
|
z = x;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
y = 0.5f * (x + z);
|
|
|
|
|
}
|
|
|
|
|
else if (b.z == 0.0f) {
|
|
|
|
|
z = 0.5f * (x + y);
|
|
|
|
|
}
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2013-07-08 23:31:45 +00:00
|
|
|
return make_float3(x, y, z);
|
|
|
|
|
}
|
|
|
|
|
|
2012-11-04 22:31:32 +00:00
|
|
|
/* Rotation of point around axis and angle */
|
|
|
|
|
|
2013-11-16 00:17:10 +01:00
|
|
|
ccl_device_inline float3 rotate_around_axis(float3 p, float3 axis, float angle)
|
2012-11-04 22:31:32 +00:00
|
|
|
{
|
|
|
|
|
float costheta = cosf(angle);
|
|
|
|
|
float sintheta = sinf(angle);
|
|
|
|
|
float3 r;
|
|
|
|
|
|
|
|
|
|
r.x = ((costheta + (1 - costheta) * axis.x * axis.x) * p.x) +
|
2017-04-14 14:05:23 +02:00
|
|
|
(((1 - costheta) * axis.x * axis.y - axis.z * sintheta) * p.y) +
|
|
|
|
|
(((1 - costheta) * axis.x * axis.z + axis.y * sintheta) * p.z);
|
2012-11-04 22:31:32 +00:00
|
|
|
|
|
|
|
|
r.y = (((1 - costheta) * axis.x * axis.y + axis.z * sintheta) * p.x) +
|
2017-04-14 14:05:23 +02:00
|
|
|
((costheta + (1 - costheta) * axis.y * axis.y) * p.y) +
|
|
|
|
|
(((1 - costheta) * axis.y * axis.z - axis.x * sintheta) * p.z);
|
2012-11-04 22:31:32 +00:00
|
|
|
|
|
|
|
|
r.z = (((1 - costheta) * axis.x * axis.z - axis.y * sintheta) * p.x) +
|
2017-04-14 14:05:23 +02:00
|
|
|
(((1 - costheta) * axis.y * axis.z + axis.x * sintheta) * p.y) +
|
|
|
|
|
((costheta + (1 - costheta) * axis.z * axis.z) * p.z);
|
2012-11-04 22:31:32 +00:00
|
|
|
|
|
|
|
|
return r;
|
|
|
|
|
}
|
|
|
|
|
|
2012-12-19 21:17:16 +00:00
|
|
|
/* NaN-safe math ops */
|
|
|
|
|
|
2013-11-16 00:17:10 +01:00
|
|
|
ccl_device_inline float safe_sqrtf(float f)
|
2013-08-18 14:15:57 +00:00
|
|
|
{
|
|
|
|
|
return sqrtf(max(f, 0.0f));
|
|
|
|
|
}
|
|
|
|
|
|
Maths Node: Additional functions
When creating shaders and using maths functions it is expected that Blender should match functions in other DCC applications, game engines and shading languages such as GLSL and OSL.
This patch adds missing functions to the Blender maths node.
Ideally, it would be nice to have these functions available to vectors too but that is not part of this patch.
This patch adds the following functions trunc, snap, wrap, compare, pingpong, sign, radians, degrees, cosh, sinh, tanh, exp, smoothmin and inversesqrt.
Sign function is based on GLSL and OSL functions and returns zero when x == 0.
Differential Revision: https://developer.blender.org/D5957
2019-12-05 23:02:05 +00:00
|
|
|
ccl_device_inline float inversesqrtf(float f)
|
|
|
|
|
{
|
2021-11-18 14:25:05 +01:00
|
|
|
#if defined(__KERNEL_METAL__)
|
|
|
|
|
return (f > 0.0f) ? rsqrt(f) : 0.0f;
|
|
|
|
|
#else
|
Maths Node: Additional functions
When creating shaders and using maths functions it is expected that Blender should match functions in other DCC applications, game engines and shading languages such as GLSL and OSL.
This patch adds missing functions to the Blender maths node.
Ideally, it would be nice to have these functions available to vectors too but that is not part of this patch.
This patch adds the following functions trunc, snap, wrap, compare, pingpong, sign, radians, degrees, cosh, sinh, tanh, exp, smoothmin and inversesqrt.
Sign function is based on GLSL and OSL functions and returns zero when x == 0.
Differential Revision: https://developer.blender.org/D5957
2019-12-05 23:02:05 +00:00
|
|
|
return (f > 0.0f) ? 1.0f / sqrtf(f) : 0.0f;
|
2021-11-18 14:25:05 +01:00
|
|
|
#endif
|
Maths Node: Additional functions
When creating shaders and using maths functions it is expected that Blender should match functions in other DCC applications, game engines and shading languages such as GLSL and OSL.
This patch adds missing functions to the Blender maths node.
Ideally, it would be nice to have these functions available to vectors too but that is not part of this patch.
This patch adds the following functions trunc, snap, wrap, compare, pingpong, sign, radians, degrees, cosh, sinh, tanh, exp, smoothmin and inversesqrt.
Sign function is based on GLSL and OSL functions and returns zero when x == 0.
Differential Revision: https://developer.blender.org/D5957
2019-12-05 23:02:05 +00:00
|
|
|
}
|
|
|
|
|
|
2013-11-16 00:17:10 +01:00
|
|
|
ccl_device float safe_asinf(float a)
|
2012-12-19 21:17:16 +00:00
|
|
|
{
|
2014-01-14 22:55:02 +04:00
|
|
|
return asinf(clamp(a, -1.0f, 1.0f));
|
2012-12-19 21:17:16 +00:00
|
|
|
}
|
|
|
|
|
|
2013-11-16 00:17:10 +01:00
|
|
|
ccl_device float safe_acosf(float a)
|
2012-12-19 21:17:16 +00:00
|
|
|
{
|
2014-01-14 22:55:02 +04:00
|
|
|
return acosf(clamp(a, -1.0f, 1.0f));
|
2012-12-19 21:17:16 +00:00
|
|
|
}
|
|
|
|
|
|
2013-11-16 00:17:10 +01:00
|
|
|
ccl_device float compatible_powf(float x, float y)
|
2012-12-19 21:17:16 +00:00
|
|
|
{
|
2014-01-14 22:55:02 +04:00
|
|
|
#ifdef __KERNEL_GPU__
|
|
|
|
|
if (y == 0.0f) /* x^0 -> 1, including 0^0 */
|
|
|
|
|
return 1.0f;
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2012-12-19 21:17:16 +00:00
|
|
|
/* GPU pow doesn't accept negative x, do manual checks here */
|
|
|
|
|
if (x < 0.0f) {
|
2013-05-16 17:20:56 +00:00
|
|
|
if (fmodf(-y, 2.0f) == 0.0f)
|
2012-12-19 21:17:16 +00:00
|
|
|
return powf(-x, y);
|
|
|
|
|
else
|
|
|
|
|
return -powf(-x, y);
|
|
|
|
|
}
|
|
|
|
|
else if (x == 0.0f)
|
|
|
|
|
return 0.0f;
|
2014-01-14 22:55:02 +04:00
|
|
|
#endif
|
2012-12-19 21:17:16 +00:00
|
|
|
return powf(x, y);
|
|
|
|
|
}
|
|
|
|
|
|
2013-11-16 00:17:10 +01:00
|
|
|
ccl_device float safe_powf(float a, float b)
|
2012-12-19 21:17:16 +00:00
|
|
|
{
|
2014-05-05 03:49:22 +10:00
|
|
|
if (UNLIKELY(a < 0.0f && b != float_to_int(b)))
|
2012-12-19 21:17:16 +00:00
|
|
|
return 0.0f;
|
2014-01-14 22:55:02 +04:00
|
|
|
|
2012-12-19 21:17:16 +00:00
|
|
|
return compatible_powf(a, b);
|
|
|
|
|
}
|
|
|
|
|
|
2017-05-07 09:16:14 -06:00
|
|
|
ccl_device float safe_divide(float a, float b)
|
2012-12-19 21:17:16 +00:00
|
|
|
{
|
2017-05-07 09:16:14 -06:00
|
|
|
return (b != 0.0f) ? a / b : 0.0f;
|
2012-12-19 21:17:16 +00:00
|
|
|
}
|
|
|
|
|
|
2017-05-07 09:16:14 -06:00
|
|
|
ccl_device float safe_logf(float a, float b)
|
2012-12-19 21:17:16 +00:00
|
|
|
{
|
2017-05-07 09:16:14 -06:00
|
|
|
if (UNLIKELY(a <= 0.0f || b <= 0.0f))
|
|
|
|
|
return 0.0f;
|
|
|
|
|
|
|
|
|
|
return safe_divide(logf(a), logf(b));
|
2012-12-19 21:17:16 +00:00
|
|
|
}
|
|
|
|
|
|
2013-11-16 00:17:10 +01:00
|
|
|
ccl_device float safe_modulo(float a, float b)
|
2013-05-20 14:38:47 +00:00
|
|
|
{
|
|
|
|
|
return (b != 0.0f) ? fmodf(a, b) : 0.0f;
|
|
|
|
|
}
|
2017-11-05 21:43:23 +01:00
|
|
|
|
|
|
|
|
ccl_device_inline float sqr(float a)
|
|
|
|
|
{
|
|
|
|
|
return a * a;
|
|
|
|
|
}
|
2013-05-20 14:38:47 +00:00
|
|
|
|
2018-07-18 11:14:43 +02:00
|
|
|
ccl_device_inline float pow20(float a)
|
|
|
|
|
{
|
|
|
|
|
return sqr(sqr(sqr(sqr(a)) * a));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ccl_device_inline float pow22(float a)
|
|
|
|
|
{
|
|
|
|
|
return sqr(a * sqr(sqr(sqr(a)) * a));
|
|
|
|
|
}
|
|
|
|
|
|
2021-12-07 15:11:35 +00:00
|
|
|
#ifdef __KERNEL_METAL__
|
|
|
|
|
ccl_device_inline float lgammaf(float x)
|
|
|
|
|
{
|
|
|
|
|
/* Nemes, Gergő (2010), "New asymptotic expansion for the Gamma function", Archiv der Mathematik
|
|
|
|
|
*/
|
|
|
|
|
const float _1_180 = 1.0f / 180.0f;
|
|
|
|
|
const float log2pi = 1.83787706641f;
|
|
|
|
|
const float logx = log(x);
|
|
|
|
|
return (log2pi - logx +
|
|
|
|
|
x * (logx * 2.0f + log(x * sinh(1.0f / x) + (_1_180 / pow(x, 6.0f))) - 2.0f)) *
|
|
|
|
|
0.5f;
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
Cycles: Add multi-scattering, energy-conserving GGX as an option to the Glossy, Anisotropic and Glass BSDFs
This commit adds a new distribution to the Glossy, Anisotropic and Glass BSDFs that implements the
multiple-scattering microfacet model described in the paper "Multiple-Scattering Microfacet BSDFs with the Smith Model".
Essentially, the improvement is that unlike classical GGX, which only models single scattering and assumes
the contribution of multiple bounces to be zero, this new model performs a random walk on the microsurface until
the ray leaves it again, which ensures perfect energy conservation.
In practise, this means that the "darkening problem" - GGX materials becoming darker with increasing
roughness - is solved in a physically correct and efficient way.
The downside of this model is that it has no (known) analytic expression for evalation. However, it can be
evaluated stochastically, and although the correct PDF isn't known either, the properties of MIS and the
balance heuristic guarantee an unbiased result at the cost of slightly higher noise.
Reviewers: dingto, #cycles, brecht
Reviewed By: dingto, #cycles, brecht
Subscribers: bliblubli, ace_dragon, gregzaal, brecht, harvester, dingto, marcog, swerner, jtheninja, Blendify, nutel
Differential Revision: https://developer.blender.org/D2002
2016-06-23 22:56:43 +02:00
|
|
|
ccl_device_inline float beta(float x, float y)
|
|
|
|
|
{
|
|
|
|
|
return expf(lgammaf(x) + lgammaf(y) - lgammaf(x + y));
|
|
|
|
|
}
|
|
|
|
|
|
2017-03-23 12:55:51 +01:00
|
|
|
ccl_device_inline float xor_signmask(float x, int y)
|
|
|
|
|
{
|
|
|
|
|
return __int_as_float(__float_as_int(x) ^ y);
|
|
|
|
|
}
|
|
|
|
|
|
2018-07-14 15:38:58 +02:00
|
|
|
ccl_device float bits_to_01(uint bits)
|
|
|
|
|
{
|
|
|
|
|
return bits * (1.0f / (float)0xFFFFFFFF);
|
|
|
|
|
}
|
|
|
|
|
|
2021-11-18 14:25:05 +01:00
|
|
|
#if !defined(__KERNEL_GPU__)
|
|
|
|
|
# if defined(__GNUC__)
|
|
|
|
|
# define popcount(x) __builtin_popcount(x)
|
|
|
|
|
# else
|
|
|
|
|
ccl_device_inline uint popcount(uint x)
|
|
|
|
|
{
|
|
|
|
|
/* TODO(Stefan): pop-count intrinsic for Windows with fallback for older CPUs. */
|
|
|
|
|
uint i = x & 0xaaaaaaaa;
|
|
|
|
|
i = i - ((i >> 1) & 0x55555555);
|
|
|
|
|
i = (i & 0x33333333) + ((i >> 2) & 0x33333333);
|
|
|
|
|
i = (((i + (i >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24;
|
|
|
|
|
return i & 1;
|
|
|
|
|
}
|
|
|
|
|
# endif
|
2022-06-28 16:55:27 +02:00
|
|
|
#elif defined(__KERNEL_HIP__)
|
|
|
|
|
/* Use popcll to support 64-bit wave for pre-RDNA AMD GPUs */
|
|
|
|
|
# define popcount(x) __popcll(x)
|
2021-11-18 14:25:05 +01:00
|
|
|
#elif !defined(__KERNEL_METAL__)
|
|
|
|
|
# define popcount(x) __popc(x)
|
|
|
|
|
#endif
|
|
|
|
|
|
2019-08-26 15:08:46 +02:00
|
|
|
ccl_device_inline uint count_leading_zeros(uint x)
|
|
|
|
|
{
|
2021-09-28 16:51:14 +02:00
|
|
|
#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) || defined(__KERNEL_HIP__)
|
2019-08-26 15:08:46 +02:00
|
|
|
return __clz(x);
|
2021-11-18 14:25:05 +01:00
|
|
|
#elif defined(__KERNEL_METAL__)
|
|
|
|
|
return clz(x);
|
2019-08-26 15:08:46 +02:00
|
|
|
#else
|
2019-08-26 22:06:02 +02:00
|
|
|
assert(x != 0);
|
2019-08-26 15:08:46 +02:00
|
|
|
# ifdef _MSC_VER
|
|
|
|
|
unsigned long leading_zero = 0;
|
|
|
|
|
_BitScanReverse(&leading_zero, x);
|
|
|
|
|
return (31 - leading_zero);
|
|
|
|
|
# else
|
|
|
|
|
return __builtin_clz(x);
|
|
|
|
|
# endif
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ccl_device_inline uint count_trailing_zeros(uint x)
|
|
|
|
|
{
|
2021-09-28 16:51:14 +02:00
|
|
|
#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) || defined(__KERNEL_HIP__)
|
2019-08-26 15:08:46 +02:00
|
|
|
return (__ffs(x) - 1);
|
2021-11-18 14:25:05 +01:00
|
|
|
#elif defined(__KERNEL_METAL__)
|
|
|
|
|
return ctz(x);
|
2019-08-26 15:08:46 +02:00
|
|
|
#else
|
2019-08-26 22:06:02 +02:00
|
|
|
assert(x != 0);
|
2019-08-26 15:08:46 +02:00
|
|
|
# ifdef _MSC_VER
|
|
|
|
|
unsigned long ctz = 0;
|
|
|
|
|
_BitScanForward(&ctz, x);
|
|
|
|
|
return ctz;
|
|
|
|
|
# else
|
|
|
|
|
return __builtin_ctz(x);
|
|
|
|
|
# endif
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
ccl_device_inline uint find_first_set(uint x)
|
|
|
|
|
{
|
2021-09-28 16:51:14 +02:00
|
|
|
#if defined(__KERNEL_CUDA__) || defined(__KERNEL_OPTIX__) || defined(__KERNEL_HIP__)
|
2019-08-26 15:08:46 +02:00
|
|
|
return __ffs(x);
|
2021-11-18 14:25:05 +01:00
|
|
|
#elif defined(__KERNEL_METAL__)
|
|
|
|
|
return (x != 0) ? ctz(x) + 1 : 0;
|
2019-08-26 15:08:46 +02:00
|
|
|
#else
|
|
|
|
|
# ifdef _MSC_VER
|
|
|
|
|
return (x != 0) ? (32 - count_leading_zeros(x & (-x))) : 0;
|
|
|
|
|
# else
|
|
|
|
|
return __builtin_ffs(x);
|
|
|
|
|
# endif
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
2014-07-29 16:07:05 +06:00
|
|
|
/* projections */
|
2015-02-19 12:52:48 +05:00
|
|
|
ccl_device_inline float2 map_to_tube(const float3 co)
|
2015-01-22 00:37:09 +05:00
|
|
|
{
|
2015-02-19 12:52:48 +05:00
|
|
|
float len, u, v;
|
|
|
|
|
len = sqrtf(co.x * co.x + co.y * co.y);
|
2015-03-28 00:15:15 +05:00
|
|
|
if (len > 0.0f) {
|
2015-02-19 12:52:48 +05:00
|
|
|
u = (1.0f - (atan2f(co.x / len, co.y / len) / M_PI_F)) * 0.5f;
|
2015-04-30 14:21:32 +05:00
|
|
|
v = (co.z + 1.0f) * 0.5f;
|
2015-01-22 00:37:09 +05:00
|
|
|
}
|
|
|
|
|
else {
|
2015-02-19 12:52:48 +05:00
|
|
|
u = v = 0.0f;
|
2015-01-22 00:37:09 +05:00
|
|
|
}
|
2015-02-19 12:52:48 +05:00
|
|
|
return make_float2(u, v);
|
2015-01-22 00:37:09 +05:00
|
|
|
}
|
|
|
|
|
|
2015-02-19 12:52:48 +05:00
|
|
|
ccl_device_inline float2 map_to_sphere(const float3 co)
|
2014-07-29 16:07:05 +06:00
|
|
|
{
|
2015-02-19 12:52:48 +05:00
|
|
|
float l = len(co);
|
|
|
|
|
float u, v;
|
|
|
|
|
if (l > 0.0f) {
|
|
|
|
|
if (UNLIKELY(co.x == 0.0f && co.y == 0.0f)) {
|
2021-02-05 16:23:34 +11:00
|
|
|
u = 0.0f; /* Otherwise domain error. */
|
2014-09-16 12:41:16 +10:00
|
|
|
}
|
|
|
|
|
else {
|
2015-02-19 12:52:48 +05:00
|
|
|
u = (1.0f - atan2f(co.x, co.y) / M_PI_F) / 2.0f;
|
2014-09-16 12:41:16 +10:00
|
|
|
}
|
2015-02-19 12:52:48 +05:00
|
|
|
v = 1.0f - safe_acosf(co.z / l) / M_PI_F;
|
2014-07-29 16:07:05 +06:00
|
|
|
}
|
|
|
|
|
else {
|
2015-02-19 12:52:48 +05:00
|
|
|
u = v = 0.0f;
|
2014-07-29 16:07:05 +06:00
|
|
|
}
|
2015-02-19 12:52:48 +05:00
|
|
|
return make_float2(u, v);
|
2014-07-29 16:07:05 +06:00
|
|
|
}
|
|
|
|
|
|
2019-05-03 11:23:16 +02:00
|
|
|
/* Compares two floats.
|
|
|
|
|
* Returns true if their absolute difference is smaller than abs_diff (for numbers near zero)
|
|
|
|
|
* or their relative difference is less than ulp_diff ULPs.
|
2019-05-06 11:59:03 +10:00
|
|
|
* Based on
|
|
|
|
|
* https://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/
|
2019-05-03 11:23:16 +02:00
|
|
|
*/
|
|
|
|
|
|
2021-11-17 17:22:05 +01:00
|
|
|
ccl_device_inline bool compare_floats(float a, float b, float abs_diff, int ulp_diff)
|
2019-05-03 11:23:16 +02:00
|
|
|
{
|
|
|
|
|
if (fabsf(a - b) < abs_diff) {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ((a < 0.0f) != (b < 0.0f)) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return (abs(__float_as_int(a) - __float_as_int(b)) < ulp_diff);
|
|
|
|
|
}
|
|
|
|
|
|
2020-07-08 02:10:02 +02:00
|
|
|
/* Calculate the angle between the two vectors a and b.
|
2021-04-08 16:20:22 +10:00
|
|
|
* The usual approach `acos(dot(a, b))` has severe precision issues for small angles,
|
2020-07-08 02:10:02 +02:00
|
|
|
* which are avoided by this method.
|
|
|
|
|
* Based on "Mangled Angles" from https://people.eecs.berkeley.edu/~wkahan/Mindless.pdf
|
|
|
|
|
*/
|
|
|
|
|
ccl_device_inline float precise_angle(float3 a, float3 b)
|
|
|
|
|
{
|
|
|
|
|
return 2.0f * atan2f(len(a - b), len(a + b));
|
|
|
|
|
}
|
|
|
|
|
|
Cycles: merge of cycles-x branch, a major update to the renderer
This includes much improved GPU rendering performance, viewport interactivity,
new shadow catcher, revamped sampling settings, subsurface scattering anisotropy,
new GPU volume sampling, improved PMJ sampling pattern, and more.
Some features have also been removed or changed, breaking backwards compatibility.
Including the removal of the OpenCL backend, for which alternatives are under
development.
Release notes and code docs:
https://wiki.blender.org/wiki/Reference/Release_Notes/3.0/Cycles
https://wiki.blender.org/wiki/Source/Render/Cycles
Credits:
* Sergey Sharybin
* Brecht Van Lommel
* Patrick Mours (OptiX backend)
* Christophe Hery (subsurface scattering anisotropy)
* William Leeson (PMJ sampling pattern)
* Alaska (various fixes and tweaks)
* Thomas Dinges (various fixes)
For the full commit history, see the cycles-x branch. This squashes together
all the changes since intermediate changes would often fail building or tests.
Ref T87839, T87837, T87836
Fixes T90734, T89353, T80267, T80267, T77185, T69800
2021-09-20 17:59:20 +02:00
|
|
|
/* Return value which is greater than the given one and is a power of two. */
|
|
|
|
|
ccl_device_inline uint next_power_of_two(uint x)
|
|
|
|
|
{
|
|
|
|
|
return x == 0 ? 1 : 1 << (32 - count_leading_zeros(x));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Return value which is lower than the given one and is a power of two. */
|
|
|
|
|
ccl_device_inline uint prev_power_of_two(uint x)
|
|
|
|
|
{
|
|
|
|
|
return x < 2 ? x : 1 << (31 - count_leading_zeros(x - 1));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#ifndef __has_builtin
|
|
|
|
|
# define __has_builtin(v) 0
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
/* Reverses the bits of a 32 bit integer. */
|
|
|
|
|
ccl_device_inline uint32_t reverse_integer_bits(uint32_t x)
|
|
|
|
|
{
|
|
|
|
|
/* Use a native instruction if it exists. */
|
2022-02-09 10:44:03 +01:00
|
|
|
#if defined(__aarch64__) || defined(_M_ARM64)
|
|
|
|
|
/* Assume the rbit is always available on 64bit ARM architecture. */
|
Cycles: merge of cycles-x branch, a major update to the renderer
This includes much improved GPU rendering performance, viewport interactivity,
new shadow catcher, revamped sampling settings, subsurface scattering anisotropy,
new GPU volume sampling, improved PMJ sampling pattern, and more.
Some features have also been removed or changed, breaking backwards compatibility.
Including the removal of the OpenCL backend, for which alternatives are under
development.
Release notes and code docs:
https://wiki.blender.org/wiki/Reference/Release_Notes/3.0/Cycles
https://wiki.blender.org/wiki/Source/Render/Cycles
Credits:
* Sergey Sharybin
* Brecht Van Lommel
* Patrick Mours (OptiX backend)
* Christophe Hery (subsurface scattering anisotropy)
* William Leeson (PMJ sampling pattern)
* Alaska (various fixes and tweaks)
* Thomas Dinges (various fixes)
For the full commit history, see the cycles-x branch. This squashes together
all the changes since intermediate changes would often fail building or tests.
Ref T87839, T87837, T87836
Fixes T90734, T89353, T80267, T80267, T77185, T69800
2021-09-20 17:59:20 +02:00
|
|
|
__asm__("rbit %w0, %w1" : "=r"(x) : "r"(x));
|
|
|
|
|
return x;
|
2022-02-09 10:44:03 +01:00
|
|
|
#elif defined(__arm__) && ((__ARM_ARCH > 7) || __ARM_ARCH == 6 && __ARM_ARCH_ISA_THUMB >= 2)
|
|
|
|
|
/* This ARM instruction is available in ARMv6T2 and above.
|
|
|
|
|
* This 32-bit Thumb instruction is available in ARMv6T2 and above. */
|
|
|
|
|
__asm__("rbit %0, %1" : "=r"(x) : "r"(x));
|
|
|
|
|
return x;
|
Cycles: merge of cycles-x branch, a major update to the renderer
This includes much improved GPU rendering performance, viewport interactivity,
new shadow catcher, revamped sampling settings, subsurface scattering anisotropy,
new GPU volume sampling, improved PMJ sampling pattern, and more.
Some features have also been removed or changed, breaking backwards compatibility.
Including the removal of the OpenCL backend, for which alternatives are under
development.
Release notes and code docs:
https://wiki.blender.org/wiki/Reference/Release_Notes/3.0/Cycles
https://wiki.blender.org/wiki/Source/Render/Cycles
Credits:
* Sergey Sharybin
* Brecht Van Lommel
* Patrick Mours (OptiX backend)
* Christophe Hery (subsurface scattering anisotropy)
* William Leeson (PMJ sampling pattern)
* Alaska (various fixes and tweaks)
* Thomas Dinges (various fixes)
For the full commit history, see the cycles-x branch. This squashes together
all the changes since intermediate changes would often fail building or tests.
Ref T87839, T87837, T87836
Fixes T90734, T89353, T80267, T80267, T77185, T69800
2021-09-20 17:59:20 +02:00
|
|
|
#elif defined(__KERNEL_CUDA__)
|
|
|
|
|
return __brev(x);
|
2021-11-18 14:25:05 +01:00
|
|
|
#elif defined(__KERNEL_METAL__)
|
|
|
|
|
return reverse_bits(x);
|
Cycles: merge of cycles-x branch, a major update to the renderer
This includes much improved GPU rendering performance, viewport interactivity,
new shadow catcher, revamped sampling settings, subsurface scattering anisotropy,
new GPU volume sampling, improved PMJ sampling pattern, and more.
Some features have also been removed or changed, breaking backwards compatibility.
Including the removal of the OpenCL backend, for which alternatives are under
development.
Release notes and code docs:
https://wiki.blender.org/wiki/Reference/Release_Notes/3.0/Cycles
https://wiki.blender.org/wiki/Source/Render/Cycles
Credits:
* Sergey Sharybin
* Brecht Van Lommel
* Patrick Mours (OptiX backend)
* Christophe Hery (subsurface scattering anisotropy)
* William Leeson (PMJ sampling pattern)
* Alaska (various fixes and tweaks)
* Thomas Dinges (various fixes)
For the full commit history, see the cycles-x branch. This squashes together
all the changes since intermediate changes would often fail building or tests.
Ref T87839, T87837, T87836
Fixes T90734, T89353, T80267, T80267, T77185, T69800
2021-09-20 17:59:20 +02:00
|
|
|
#elif __has_builtin(__builtin_bitreverse32)
|
|
|
|
|
return __builtin_bitreverse32(x);
|
|
|
|
|
#else
|
|
|
|
|
/* Flip pairwise. */
|
|
|
|
|
x = ((x & 0x55555555) << 1) | ((x & 0xAAAAAAAA) >> 1);
|
|
|
|
|
/* Flip pairs. */
|
|
|
|
|
x = ((x & 0x33333333) << 2) | ((x & 0xCCCCCCCC) >> 2);
|
|
|
|
|
/* Flip nibbles. */
|
|
|
|
|
x = ((x & 0x0F0F0F0F) << 4) | ((x & 0xF0F0F0F0) >> 4);
|
|
|
|
|
/* Flip bytes. CPUs have an instruction for that, pretty fast one. */
|
|
|
|
|
# ifdef _MSC_VER
|
|
|
|
|
return _byteswap_ulong(x);
|
|
|
|
|
# elif defined(__INTEL_COMPILER)
|
|
|
|
|
return (uint32_t)_bswap((int)x);
|
|
|
|
|
# else
|
|
|
|
|
/* Assuming gcc or clang. */
|
|
|
|
|
return __builtin_bswap32(x);
|
|
|
|
|
# endif
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
2011-04-27 11:58:34 +00:00
|
|
|
CCL_NAMESPACE_END
|
|
|
|
|
|
|
|
|
|
#endif /* __UTIL_MATH_H__ */
|