Cleanup: Cycles: add more float3 util functions
and vectorize `wrap` and `safe_fmod`.
This commit is contained in:
committed by
Weizhen Huang
parent
48777385c2
commit
345d23bff8
@@ -76,10 +76,10 @@ ccl_device void svm_vector_math(ccl_private float *value,
|
||||
*vector = ceil(a);
|
||||
break;
|
||||
case NODE_VECTOR_MATH_MODULO:
|
||||
*vector = make_float3(safe_modulo(a.x, b.x), safe_modulo(a.y, b.y), safe_modulo(a.z, b.z));
|
||||
*vector = safe_fmod(a, b);
|
||||
break;
|
||||
case NODE_VECTOR_MATH_WRAP:
|
||||
*vector = make_float3(wrapf(a.x, b.x, c.x), wrapf(a.y, b.y, c.y), wrapf(a.z, b.z, c.z));
|
||||
*vector = wrap(a, b, c);
|
||||
break;
|
||||
case NODE_VECTOR_MATH_FRACTION:
|
||||
*vector = a - floor(a);
|
||||
@@ -88,10 +88,10 @@ ccl_device void svm_vector_math(ccl_private float *value,
|
||||
*vector = fabs(a);
|
||||
break;
|
||||
case NODE_VECTOR_MATH_POWER:
|
||||
*vector = make_float3(safe_powf(a.x, b.x), safe_powf(a.y, b.y), safe_powf(a.z, b.z));
|
||||
*vector = safe_pow(a, b);
|
||||
break;
|
||||
case NODE_VECTOR_MATH_SIGN:
|
||||
*vector = make_float3(compatible_signf(a.x), compatible_signf(a.y), compatible_signf(a.z));
|
||||
*vector = compatible_sign(a);
|
||||
break;
|
||||
case NODE_VECTOR_MATH_MINIMUM:
|
||||
*vector = min(a, b);
|
||||
@@ -100,13 +100,13 @@ ccl_device void svm_vector_math(ccl_private float *value,
|
||||
*vector = max(a, b);
|
||||
break;
|
||||
case NODE_VECTOR_MATH_SINE:
|
||||
*vector = make_float3(sinf(a.x), sinf(a.y), sinf(a.z));
|
||||
*vector = sin(a);
|
||||
break;
|
||||
case NODE_VECTOR_MATH_COSINE:
|
||||
*vector = make_float3(cosf(a.x), cosf(a.y), cosf(a.z));
|
||||
*vector = cos(a);
|
||||
break;
|
||||
case NODE_VECTOR_MATH_TANGENT:
|
||||
*vector = make_float3(tanf(a.x), tanf(a.y), tanf(a.z));
|
||||
*vector = tan(a);
|
||||
break;
|
||||
default:
|
||||
*vector = zero_float3();
|
||||
|
||||
@@ -347,6 +347,20 @@ ccl_device_inline float3 fmod(const float3 a, const float b)
|
||||
# endif
|
||||
}
|
||||
|
||||
ccl_device_inline float3 fmod(const float3 a, const float3 b)
|
||||
{
|
||||
# if defined(__KERNEL_NEON__)
|
||||
const float32x4_t iquot = vrndq_f32(vdivq_f32(a.m128, b.m128));
|
||||
return float3(vsubq_f32(a, vmulq_f32(iquot, b.m128)));
|
||||
# elif defined(__KERNEL_SSE42__) && defined(__KERNEL_SSE__)
|
||||
const __m128 div = _mm_div_ps(a.m128, b.m128);
|
||||
const __m128 iquot = _mm_round_ps(div, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
|
||||
return float3(_mm_sub_ps(a.m128, _mm_mul_ps(iquot, b.m128)));
|
||||
# else
|
||||
return make_float3(fmodf(a.x, b.x), fmodf(a.y, b.y), fmodf(a.z, b.z));
|
||||
# endif
|
||||
}
|
||||
|
||||
ccl_device_inline float3 sqrt(const float3 a)
|
||||
{
|
||||
# ifdef __KERNEL_SSE__
|
||||
@@ -394,11 +408,21 @@ ccl_device_inline float3 log(const float3 v)
|
||||
return make_float3(logf(v.x), logf(v.y), logf(v.z));
|
||||
}
|
||||
|
||||
ccl_device_inline float3 sin(const float3 v)
|
||||
{
|
||||
return make_float3(sinf(v.x), sinf(v.y), sinf(v.z));
|
||||
}
|
||||
|
||||
ccl_device_inline float3 cos(const float3 v)
|
||||
{
|
||||
return make_float3(cosf(v.x), cosf(v.y), cosf(v.z));
|
||||
}
|
||||
|
||||
ccl_device_inline float3 tan(const float3 v)
|
||||
{
|
||||
return make_float3(tanf(v.x), tanf(v.y), tanf(v.z));
|
||||
}
|
||||
|
||||
ccl_device_inline float3 atan2(const float3 y, const float3 x)
|
||||
{
|
||||
return make_float3(atan2f(y.x, x.x), atan2f(y.y, x.y), atan2f(y.z, x.z));
|
||||
@@ -547,6 +571,49 @@ ccl_device_inline float3 power(const float3 v, const float e)
|
||||
return make_float3(powf(v.x, e), powf(v.y, e), powf(v.z, e));
|
||||
}
|
||||
|
||||
ccl_device_inline float3 safe_pow(const float3 a, const float3 b)
|
||||
{
|
||||
return make_float3(safe_powf(a.x, b.x), safe_powf(a.y, b.y), safe_powf(a.z, b.z));
|
||||
}
|
||||
|
||||
ccl_device_inline auto component_wise_equal(const float3 a, const float3 b)
|
||||
{
|
||||
#if defined(__KERNEL_METAL__)
|
||||
return a == b;
|
||||
#elif defined __KERNEL_NEON__
|
||||
return int3(vreinterpretq_m128i_s32(vceqq_f32(a.m128, b.m128)));
|
||||
#elif defined(__KERNEL_SSE__)
|
||||
return int3(_mm_castps_si128(_mm_cmpeq_ps(a.m128, b.m128)));
|
||||
#else
|
||||
return make_int3(a.x == b.x, a.y == b.y, a.z == b.z);
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline auto component_is_zero(const float3 a)
|
||||
{
|
||||
return component_wise_equal(a, zero_float3());
|
||||
}
|
||||
|
||||
ccl_device_inline float3 safe_floored_fmod(const float3 a, const float3 b)
|
||||
{
|
||||
return select(component_is_zero(b), zero_float3(), a - floor(a / b) * b);
|
||||
}
|
||||
|
||||
ccl_device_inline float3 wrap(const float3 value, const float3 max, const float3 min)
|
||||
{
|
||||
return safe_floored_fmod(value - min, max - min) + min;
|
||||
}
|
||||
|
||||
ccl_device_inline float3 safe_fmod(const float3 a, const float3 b)
|
||||
{
|
||||
return select(component_is_zero(b), zero_float3(), fmod(a, b));
|
||||
}
|
||||
|
||||
ccl_device_inline float3 compatible_sign(const float3 v)
|
||||
{
|
||||
return make_float3(compatible_signf(v.x), compatible_signf(v.y), compatible_signf(v.z));
|
||||
}
|
||||
|
||||
ccl_device_inline bool isfinite_safe(const float3 v)
|
||||
{
|
||||
return isfinite_safe(v.x) && isfinite_safe(v.y) && isfinite_safe(v.z);
|
||||
|
||||
Reference in New Issue
Block a user