Fix #136811: HIP-RT performance regression in 4.5

Reduce the register pressure and branching in the switch() by using
subclass and cast from void* to the base class.

This ensures intersection functions are not inlined multiple times,
bringing performance back.

Alternative could be to avoid functions (they are quite large) but
that only partially resolves the performance regression.

Pull Request: https://projects.blender.org/blender/blender/pulls/136823
This commit is contained in:
Sergey Sharybin
2025-04-01 17:59:44 +02:00
committed by Sergey Sharybin
parent 4499ae4505
commit 36559fd89f
2 changed files with 21 additions and 25 deletions

View File

@@ -181,11 +181,11 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
SET_HIPRT_RAY(ray_hip, ray)
ShadowPayload payload;
payload.ray.kg = kg;
payload.ray.self = ray->self;
payload.ray.visibility = visibility;
payload.ray.prim_type = PRIMITIVE_NONE;
payload.ray.ray_time = ray->time;
payload.kg = kg;
payload.self = ray->self;
payload.visibility = visibility;
payload.prim_type = PRIMITIVE_NONE;
payload.ray_time = ray->time;
payload.in_state = state;
payload.max_hits = max_hits;
payload.num_hits = 0;

View File

@@ -12,13 +12,12 @@ struct RayPayload {
float ray_time;
};
struct ShadowPayload {
/* Some ray types might use the same intersection function for regular and shadow intersections,
* but have different filter functions for them. To make this code simpler essentially subclass
* from RayPayload, but let compiler to possibly shuffle things inside of the payload struct if
* it decides it helps performance. */
RayPayload ray;
/* Some ray types might use the same intersection function for regular and shadow intersections,
* but have different filter functions for them. To make this code simpler subclass from
* RayPayload.
*
* NOTE: This assumes that reinterpret_cast from void pointer to RayPayload works correctly. */
struct ShadowPayload : RayPayload {
int in_state;
uint max_hits;
uint num_hits;
@@ -379,12 +378,12 @@ ccl_device_inline bool shadow_intersection_filter(const hiprtRay &ray,
const hiprtHit &hit)
{
KernelGlobals kg = payload->ray.kg;
KernelGlobals kg = payload->kg;
const uint num_hits = payload->num_hits;
const uint max_hits = payload->max_hits;
const int state = payload->in_state;
const RaySelfPrimitives &self = payload->ray.self;
const RaySelfPrimitives &self = payload->self;
const int object = kernel_data_fetch(user_instance_id, hit.instanceID);
const int prim_offset = kernel_data_fetch(object_prim_offset, object);
@@ -399,7 +398,7 @@ ccl_device_inline bool shadow_intersection_filter(const hiprtRay &ray,
# endif
# ifdef __VISIBILITY_FLAG__
if ((kernel_data_fetch(objects, object).visibility & payload->ray.visibility) == 0) {
if ((kernel_data_fetch(objects, object).visibility & payload->visibility) == 0) {
return true; /* No hit - continue traversal. */
}
# endif
@@ -466,12 +465,12 @@ ccl_device_inline bool shadow_intersection_filter_curves(const hiprtRay &ray,
const hiprtHit &hit)
{
KernelGlobals kg = payload->ray.kg;
KernelGlobals kg = payload->kg;
const uint num_hits = payload->num_hits;
const uint num_recorded_hits = *(payload->r_num_recorded_hits);
const uint max_hits = payload->max_hits;
const RaySelfPrimitives &self = payload->ray.self;
const RaySelfPrimitives &self = payload->self;
const int object = kernel_data_fetch(user_instance_id, hit.instanceID);
const int prim = hit.primID;
@@ -486,7 +485,7 @@ ccl_device_inline bool shadow_intersection_filter_curves(const hiprtRay &ray,
# endif
# ifdef __VISIBILITY_FLAG__
if ((kernel_data_fetch(objects, object).visibility & payload->ray.visibility) == 0) {
if ((kernel_data_fetch(objects, object).visibility & payload->visibility) == 0) {
return true; /* No hit - continue traversal. */
}
# endif
@@ -506,7 +505,7 @@ ccl_device_inline bool shadow_intersection_filter_curves(const hiprtRay &ray,
return true; /* Continue traversal. */
}
const int primitive_type = payload->ray.prim_type;
const int primitive_type = payload->prim_type;
# ifndef __TRANSPARENT_SHADOWS__
return false;
@@ -614,21 +613,18 @@ HIPRT_DEVICE bool intersectFunc(const uint geom_type,
const uint index = tableHeader.numGeomTypes * ray_type + geom_type;
switch (index) {
case Curve_Intersect_Function:
return curve_custom_intersect(ray, (RayPayload *)payload, hit);
case Curve_Intersect_Shadow:
return curve_custom_intersect(ray, &((ShadowPayload *)payload)->ray, hit);
return curve_custom_intersect(ray, (RayPayload *)payload, hit);
case Motion_Triangle_Intersect_Function:
return motion_triangle_custom_intersect(ray, (RayPayload *)payload, hit);
case Motion_Triangle_Intersect_Shadow:
return motion_triangle_custom_intersect(ray, &((ShadowPayload *)payload)->ray, hit);
return motion_triangle_custom_intersect(ray, (RayPayload *)payload, hit);
case Motion_Triangle_Intersect_Local:
return motion_triangle_custom_local_intersect(ray, (LocalPayload *)payload, hit);
case Motion_Triangle_Intersect_Volume:
return motion_triangle_custom_volume_intersect(ray, (RayPayload *)payload, hit);
case Point_Intersect_Function:
return point_custom_intersect(ray, (RayPayload *)payload, hit);
case Point_Intersect_Shadow:
return point_custom_intersect(ray, &((ShadowPayload *)payload)->ray, hit);
return point_custom_intersect(ray, (RayPayload *)payload, hit);
default:
break;
}