Refactor: Cycles: Rename rcp to reciprocal

To avoid symbol conflicts with upcoming HIP changes. Also remove unused implementations for float4 and float8. Pull Request: https://projects.blender.org/blender/blender/pulls/134045
2025-02-04 18:59:24 +01:00
parent 7b976eb810
commit f80f97ca0d
6 changed files with 16 additions and 49 deletions
--- a/intern/cycles/bvh/binning.cpp
+++ b/intern/cycles/bvh/binning.cpp
@@ -77,7 +77,7 @@ BVHObjectBinning::BVHObjectBinning(const BVHRange &job,

  /* compute number of bins to use and precompute scaling factor for binning */
  num_bins = min(size_t(MAX_BINS), size_t(4.0f + 0.05f * size()));
-  scale = rcp(cent_bounds_.size()) * make_float3((float)num_bins);
+  scale = reciprocal(cent_bounds_.size()) * make_float3((float)num_bins);

  /* initialize binning counter and bounds */
  BoundBox bin_bounds[MAX_BINS][4]; /* bounds for every bin in every dimension */
--- a/intern/cycles/kernel/geom/object.h
+++ b/intern/cycles/kernel/geom/object.h
@@ -496,7 +496,7 @@ ccl_device_inline float3 bvh_clamp_direction(const float3 dir)

 ccl_device_inline float3 bvh_inverse_direction(const float3 dir)
 {
-  return rcp(dir);
+  return reciprocal(dir);
 }

 /* Transform ray into object space to enter static object in BVH */
--- a/intern/cycles/util/math_float3.h
+++ b/intern/cycles/util/math_float3.h
@@ -26,14 +26,17 @@ ccl_device_inline float3 one_float3()
  return make_float3(1.0f, 1.0f, 1.0f);
 }

-#if defined(__KERNEL_METAL__)
-
-ccl_device_inline float3 rcp(const float3 a)
+ccl_device_inline float3 reciprocal(const float3 a)
 {
+#ifdef __KERNEL_SSE__
+  /* Don't use _mm_rcp_ps due to poor precision. */
+  return float3(_mm_div_ps(_mm_set_ps1(1.0f), a.m128));
+#else
  return make_float3(1.0f / a.x, 1.0f / a.y, 1.0f / a.z);
+#endif
 }

-#else
+#ifndef __KERNEL_METAL__

 ccl_device_inline float3 operator-(const float3 &a)
 {
@@ -347,16 +350,6 @@ ccl_device_inline float3 mix(const float3 a, const float3 b, const float t)
  return a + t * (b - a);
 }

-ccl_device_inline float3 rcp(const float3 a)
-{
-#  ifdef __KERNEL_SSE__
-  /* Don't use _mm_rcp_ps due to poor precision. */
-  return float3(_mm_div_ps(_mm_set_ps1(1.0f), a.m128));
-#  else
-  return make_float3(1.0f / a.x, 1.0f / a.y, 1.0f / a.z);
-#  endif
-}
-
 ccl_device_inline float3 saturate(const float3 a)
 {
  return make_float3(saturatef(a.x), saturatef(a.y), saturatef(a.z));
--- a/intern/cycles/util/math_float4.h
+++ b/intern/cycles/util/math_float4.h
@@ -401,16 +401,6 @@ ccl_device_inline float distance(const float4 a, const float4 b)
  return len(a - b);
 }

-ccl_device_inline float4 rcp(const float4 a)
-{
-#  ifdef __KERNEL_SSE__
-  /* Don't use _mm_rcp_ps due to poor precision. */
-  return float4(_mm_div_ps(_mm_set_ps1(1.0f), a.m128));
-#  else
-  return make_float4(1.0f / a.x, 1.0f / a.y, 1.0f / a.z, 1.0f / a.w);
-#  endif
-}
-
 ccl_device_inline float4 sqrt(const float4 a)
 {
 #  ifdef __KERNEL_SSE__
--- a/intern/cycles/util/math_float8.h
+++ b/intern/cycles/util/math_float8.h
@@ -168,22 +168,6 @@ ccl_device_inline vfloat8 operator^(const vfloat8 a, const vfloat8 b)
 #endif
 }

-ccl_device_inline vfloat8 rcp(const vfloat8 a)
-{
-#ifdef __KERNEL_AVX__
-  return vfloat8(_mm256_rcp_ps(a.m256));
-#else
-  return make_vfloat8(1.0f / a.a,
-                      1.0f / a.b,
-                      1.0f / a.c,
-                      1.0f / a.d,
-                      1.0f / a.e,
-                      1.0f / a.f,
-                      1.0f / a.g,
-                      1.0f / a.h);
-#endif
-}
-
 ccl_device_inline vfloat8 sqrt(const vfloat8 a)
 {
 #ifdef __KERNEL_AVX__
--- a/intern/cycles/util/math_intersect.h
+++ b/intern/cycles/util/math_intersect.h
@@ -113,16 +113,16 @@ ccl_device bool ray_disk_intersect(const float3 ray_P,
 }

 /* Custom rcp, cross and dot implementations that match Embree bit for bit. */
-ccl_device_forceinline float ray_triangle_rcp(const float x)
+ccl_device_forceinline float ray_triangle_reciprocal(const float x)
 {
 #ifdef __KERNEL_NEON__
  /* Move scalar to vector register and do rcp. */
  __m128 a = {0};
  a = vsetq_lane_f32(x, a, 0);
-  float32x4_t reciprocal = vrecpeq_f32(a);
-  reciprocal = vmulq_f32(vrecpsq_f32(a, reciprocal), reciprocal);
-  reciprocal = vmulq_f32(vrecpsq_f32(a, reciprocal), reciprocal);
-  return vgetq_lane_f32(reciprocal, 0);
+  float32x4_t rt_rcp = vrecpeq_f32(a);
+  rt_rcp = vmulq_f32(vrecpsq_f32(a, rt_rcp), rt_rcp);
+  rt_rcp = vmulq_f32(vrecpsq_f32(a, rt_rcp), rt_rcp);
+  return vgetq_lane_f32(rt_rcp, 0);
 #elif defined(__KERNEL_SSE__)
  const __m128 a = _mm_set_ss(x);
  const __m128 r = _mm_rcp_ss(a);
@@ -214,7 +214,7 @@ ccl_device_forceinline bool ray_triangle_intersect(const float3 ray_P,
    return false;
  }

-  const float rcp_uvw = (fabsf(UVW) < 1e-18f) ? 0.0f : ray_triangle_rcp(UVW);
+  const float rcp_uvw = (fabsf(UVW) < 1e-18f) ? 0.0f : ray_triangle_reciprocal(UVW);
  *isect_u = min(U * rcp_uvw, 1.0f);
  *isect_v = min(V * rcp_uvw, 1.0f);
  *isect_t = t;
@@ -339,7 +339,7 @@ ccl_device bool ray_aabb_intersect(const float3 bbox_min,
                                   const float3 ray_D,
                                   ccl_private Interval<float> *t_range)
 {
-  const float3 inv_ray_D = rcp(ray_D);
+  const float3 inv_ray_D = reciprocal(ray_D);

  /* Absolute distances to lower and upper box coordinates; */
  const float3 t_lower = (bbox_min - ray_P) * inv_ray_D;