Refactor: Cycles: Rename rcp to reciprocal
To avoid symbol conflicts with upcoming HIP changes. Also remove unused implementations for float4 and float8. Pull Request: https://projects.blender.org/blender/blender/pulls/134045
This commit is contained in:
committed by
Brecht Van Lommel
parent
7b976eb810
commit
f80f97ca0d
@@ -77,7 +77,7 @@ BVHObjectBinning::BVHObjectBinning(const BVHRange &job,
|
||||
|
||||
/* compute number of bins to use and precompute scaling factor for binning */
|
||||
num_bins = min(size_t(MAX_BINS), size_t(4.0f + 0.05f * size()));
|
||||
scale = rcp(cent_bounds_.size()) * make_float3((float)num_bins);
|
||||
scale = reciprocal(cent_bounds_.size()) * make_float3((float)num_bins);
|
||||
|
||||
/* initialize binning counter and bounds */
|
||||
BoundBox bin_bounds[MAX_BINS][4]; /* bounds for every bin in every dimension */
|
||||
|
||||
@@ -496,7 +496,7 @@ ccl_device_inline float3 bvh_clamp_direction(const float3 dir)
|
||||
|
||||
ccl_device_inline float3 bvh_inverse_direction(const float3 dir)
|
||||
{
|
||||
return rcp(dir);
|
||||
return reciprocal(dir);
|
||||
}
|
||||
|
||||
/* Transform ray into object space to enter static object in BVH */
|
||||
|
||||
@@ -26,14 +26,17 @@ ccl_device_inline float3 one_float3()
|
||||
return make_float3(1.0f, 1.0f, 1.0f);
|
||||
}
|
||||
|
||||
#if defined(__KERNEL_METAL__)
|
||||
|
||||
ccl_device_inline float3 rcp(const float3 a)
|
||||
ccl_device_inline float3 reciprocal(const float3 a)
|
||||
{
|
||||
#ifdef __KERNEL_SSE__
|
||||
/* Don't use _mm_rcp_ps due to poor precision. */
|
||||
return float3(_mm_div_ps(_mm_set_ps1(1.0f), a.m128));
|
||||
#else
|
||||
return make_float3(1.0f / a.x, 1.0f / a.y, 1.0f / a.z);
|
||||
#endif
|
||||
}
|
||||
|
||||
#else
|
||||
#ifndef __KERNEL_METAL__
|
||||
|
||||
ccl_device_inline float3 operator-(const float3 &a)
|
||||
{
|
||||
@@ -347,16 +350,6 @@ ccl_device_inline float3 mix(const float3 a, const float3 b, const float t)
|
||||
return a + t * (b - a);
|
||||
}
|
||||
|
||||
ccl_device_inline float3 rcp(const float3 a)
|
||||
{
|
||||
# ifdef __KERNEL_SSE__
|
||||
/* Don't use _mm_rcp_ps due to poor precision. */
|
||||
return float3(_mm_div_ps(_mm_set_ps1(1.0f), a.m128));
|
||||
# else
|
||||
return make_float3(1.0f / a.x, 1.0f / a.y, 1.0f / a.z);
|
||||
# endif
|
||||
}
|
||||
|
||||
ccl_device_inline float3 saturate(const float3 a)
|
||||
{
|
||||
return make_float3(saturatef(a.x), saturatef(a.y), saturatef(a.z));
|
||||
|
||||
@@ -401,16 +401,6 @@ ccl_device_inline float distance(const float4 a, const float4 b)
|
||||
return len(a - b);
|
||||
}
|
||||
|
||||
ccl_device_inline float4 rcp(const float4 a)
|
||||
{
|
||||
# ifdef __KERNEL_SSE__
|
||||
/* Don't use _mm_rcp_ps due to poor precision. */
|
||||
return float4(_mm_div_ps(_mm_set_ps1(1.0f), a.m128));
|
||||
# else
|
||||
return make_float4(1.0f / a.x, 1.0f / a.y, 1.0f / a.z, 1.0f / a.w);
|
||||
# endif
|
||||
}
|
||||
|
||||
ccl_device_inline float4 sqrt(const float4 a)
|
||||
{
|
||||
# ifdef __KERNEL_SSE__
|
||||
|
||||
@@ -168,22 +168,6 @@ ccl_device_inline vfloat8 operator^(const vfloat8 a, const vfloat8 b)
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline vfloat8 rcp(const vfloat8 a)
|
||||
{
|
||||
#ifdef __KERNEL_AVX__
|
||||
return vfloat8(_mm256_rcp_ps(a.m256));
|
||||
#else
|
||||
return make_vfloat8(1.0f / a.a,
|
||||
1.0f / a.b,
|
||||
1.0f / a.c,
|
||||
1.0f / a.d,
|
||||
1.0f / a.e,
|
||||
1.0f / a.f,
|
||||
1.0f / a.g,
|
||||
1.0f / a.h);
|
||||
#endif
|
||||
}
|
||||
|
||||
ccl_device_inline vfloat8 sqrt(const vfloat8 a)
|
||||
{
|
||||
#ifdef __KERNEL_AVX__
|
||||
|
||||
@@ -113,16 +113,16 @@ ccl_device bool ray_disk_intersect(const float3 ray_P,
|
||||
}
|
||||
|
||||
/* Custom rcp, cross and dot implementations that match Embree bit for bit. */
|
||||
ccl_device_forceinline float ray_triangle_rcp(const float x)
|
||||
ccl_device_forceinline float ray_triangle_reciprocal(const float x)
|
||||
{
|
||||
#ifdef __KERNEL_NEON__
|
||||
/* Move scalar to vector register and do rcp. */
|
||||
__m128 a = {0};
|
||||
a = vsetq_lane_f32(x, a, 0);
|
||||
float32x4_t reciprocal = vrecpeq_f32(a);
|
||||
reciprocal = vmulq_f32(vrecpsq_f32(a, reciprocal), reciprocal);
|
||||
reciprocal = vmulq_f32(vrecpsq_f32(a, reciprocal), reciprocal);
|
||||
return vgetq_lane_f32(reciprocal, 0);
|
||||
float32x4_t rt_rcp = vrecpeq_f32(a);
|
||||
rt_rcp = vmulq_f32(vrecpsq_f32(a, rt_rcp), rt_rcp);
|
||||
rt_rcp = vmulq_f32(vrecpsq_f32(a, rt_rcp), rt_rcp);
|
||||
return vgetq_lane_f32(rt_rcp, 0);
|
||||
#elif defined(__KERNEL_SSE__)
|
||||
const __m128 a = _mm_set_ss(x);
|
||||
const __m128 r = _mm_rcp_ss(a);
|
||||
@@ -214,7 +214,7 @@ ccl_device_forceinline bool ray_triangle_intersect(const float3 ray_P,
|
||||
return false;
|
||||
}
|
||||
|
||||
const float rcp_uvw = (fabsf(UVW) < 1e-18f) ? 0.0f : ray_triangle_rcp(UVW);
|
||||
const float rcp_uvw = (fabsf(UVW) < 1e-18f) ? 0.0f : ray_triangle_reciprocal(UVW);
|
||||
*isect_u = min(U * rcp_uvw, 1.0f);
|
||||
*isect_v = min(V * rcp_uvw, 1.0f);
|
||||
*isect_t = t;
|
||||
@@ -339,7 +339,7 @@ ccl_device bool ray_aabb_intersect(const float3 bbox_min,
|
||||
const float3 ray_D,
|
||||
ccl_private Interval<float> *t_range)
|
||||
{
|
||||
const float3 inv_ray_D = rcp(ray_D);
|
||||
const float3 inv_ray_D = reciprocal(ray_D);
|
||||
|
||||
/* Absolute distances to lower and upper box coordinates; */
|
||||
const float3 t_lower = (bbox_min - ray_P) * inv_ray_D;
|
||||
|
||||
Reference in New Issue
Block a user