Refactor: Cycles: Rename rcp to reciprocal

To avoid symbol conflicts with upcoming HIP changes. Also remove
unused implementations for float4 and float8.

Pull Request: https://projects.blender.org/blender/blender/pulls/134045
This commit is contained in:
Brecht Van Lommel
2025-02-04 18:59:24 +01:00
committed by Brecht Van Lommel
parent 7b976eb810
commit f80f97ca0d
6 changed files with 16 additions and 49 deletions

View File

@@ -77,7 +77,7 @@ BVHObjectBinning::BVHObjectBinning(const BVHRange &job,
/* compute number of bins to use and precompute scaling factor for binning */
num_bins = min(size_t(MAX_BINS), size_t(4.0f + 0.05f * size()));
scale = rcp(cent_bounds_.size()) * make_float3((float)num_bins);
scale = reciprocal(cent_bounds_.size()) * make_float3((float)num_bins);
/* initialize binning counter and bounds */
BoundBox bin_bounds[MAX_BINS][4]; /* bounds for every bin in every dimension */

View File

@@ -496,7 +496,7 @@ ccl_device_inline float3 bvh_clamp_direction(const float3 dir)
ccl_device_inline float3 bvh_inverse_direction(const float3 dir)
{
return rcp(dir);
return reciprocal(dir);
}
/* Transform ray into object space to enter static object in BVH */

View File

@@ -26,14 +26,17 @@ ccl_device_inline float3 one_float3()
return make_float3(1.0f, 1.0f, 1.0f);
}
#if defined(__KERNEL_METAL__)
ccl_device_inline float3 rcp(const float3 a)
ccl_device_inline float3 reciprocal(const float3 a)
{
#ifdef __KERNEL_SSE__
/* Don't use _mm_rcp_ps due to poor precision. */
return float3(_mm_div_ps(_mm_set_ps1(1.0f), a.m128));
#else
return make_float3(1.0f / a.x, 1.0f / a.y, 1.0f / a.z);
#endif
}
#else
#ifndef __KERNEL_METAL__
ccl_device_inline float3 operator-(const float3 &a)
{
@@ -347,16 +350,6 @@ ccl_device_inline float3 mix(const float3 a, const float3 b, const float t)
return a + t * (b - a);
}
ccl_device_inline float3 rcp(const float3 a)
{
# ifdef __KERNEL_SSE__
/* Don't use _mm_rcp_ps due to poor precision. */
return float3(_mm_div_ps(_mm_set_ps1(1.0f), a.m128));
# else
return make_float3(1.0f / a.x, 1.0f / a.y, 1.0f / a.z);
# endif
}
ccl_device_inline float3 saturate(const float3 a)
{
return make_float3(saturatef(a.x), saturatef(a.y), saturatef(a.z));

View File

@@ -401,16 +401,6 @@ ccl_device_inline float distance(const float4 a, const float4 b)
return len(a - b);
}
ccl_device_inline float4 rcp(const float4 a)
{
# ifdef __KERNEL_SSE__
/* Don't use _mm_rcp_ps due to poor precision. */
return float4(_mm_div_ps(_mm_set_ps1(1.0f), a.m128));
# else
return make_float4(1.0f / a.x, 1.0f / a.y, 1.0f / a.z, 1.0f / a.w);
# endif
}
ccl_device_inline float4 sqrt(const float4 a)
{
# ifdef __KERNEL_SSE__

View File

@@ -168,22 +168,6 @@ ccl_device_inline vfloat8 operator^(const vfloat8 a, const vfloat8 b)
#endif
}
ccl_device_inline vfloat8 rcp(const vfloat8 a)
{
#ifdef __KERNEL_AVX__
return vfloat8(_mm256_rcp_ps(a.m256));
#else
return make_vfloat8(1.0f / a.a,
1.0f / a.b,
1.0f / a.c,
1.0f / a.d,
1.0f / a.e,
1.0f / a.f,
1.0f / a.g,
1.0f / a.h);
#endif
}
ccl_device_inline vfloat8 sqrt(const vfloat8 a)
{
#ifdef __KERNEL_AVX__

View File

@@ -113,16 +113,16 @@ ccl_device bool ray_disk_intersect(const float3 ray_P,
}
/* Custom rcp, cross and dot implementations that match Embree bit for bit. */
ccl_device_forceinline float ray_triangle_rcp(const float x)
ccl_device_forceinline float ray_triangle_reciprocal(const float x)
{
#ifdef __KERNEL_NEON__
/* Move scalar to vector register and do rcp. */
__m128 a = {0};
a = vsetq_lane_f32(x, a, 0);
float32x4_t reciprocal = vrecpeq_f32(a);
reciprocal = vmulq_f32(vrecpsq_f32(a, reciprocal), reciprocal);
reciprocal = vmulq_f32(vrecpsq_f32(a, reciprocal), reciprocal);
return vgetq_lane_f32(reciprocal, 0);
float32x4_t rt_rcp = vrecpeq_f32(a);
rt_rcp = vmulq_f32(vrecpsq_f32(a, rt_rcp), rt_rcp);
rt_rcp = vmulq_f32(vrecpsq_f32(a, rt_rcp), rt_rcp);
return vgetq_lane_f32(rt_rcp, 0);
#elif defined(__KERNEL_SSE__)
const __m128 a = _mm_set_ss(x);
const __m128 r = _mm_rcp_ss(a);
@@ -214,7 +214,7 @@ ccl_device_forceinline bool ray_triangle_intersect(const float3 ray_P,
return false;
}
const float rcp_uvw = (fabsf(UVW) < 1e-18f) ? 0.0f : ray_triangle_rcp(UVW);
const float rcp_uvw = (fabsf(UVW) < 1e-18f) ? 0.0f : ray_triangle_reciprocal(UVW);
*isect_u = min(U * rcp_uvw, 1.0f);
*isect_v = min(V * rcp_uvw, 1.0f);
*isect_t = t;
@@ -339,7 +339,7 @@ ccl_device bool ray_aabb_intersect(const float3 bbox_min,
const float3 ray_D,
ccl_private Interval<float> *t_range)
{
const float3 inv_ray_D = rcp(ray_D);
const float3 inv_ray_D = reciprocal(ray_D);
/* Absolute distances to lower and upper box coordinates; */
const float3 t_lower = (bbox_min - ray_P) * inv_ray_D;