Files
test2/intern/cycles/kernel/device/hiprt/bvh.h
Sergey Sharybin 36559fd89f Fix #136811: HIP-RT performance regression in 4.5
Reduce the register pressure and branching in the switch() by using
subclass and cast from void* to the base class.

This ensures intersection functions are not inlined multiple times,
bringing performance back.

Alternative could be to avoid functions (they are quite large) but
that only partially resolves the performance regression.

Pull Request: https://projects.blender.org/blender/blender/pulls/136823
2025-04-01 17:59:44 +02:00

249 lines
7.7 KiB
C++

/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
*
* SPDX-License-Identifier: Apache-2.0 */
#pragma once
#include "kernel/device/hiprt/common.h"
CCL_NAMESPACE_BEGIN
ccl_device_inline bool scene_intersect_valid(const ccl_private Ray *ray)
{
return isfinite_safe(ray->P.x) && isfinite_safe(ray->D.x) && len_squared(ray->D) != 0.0f;
}
ccl_device_intersect bool scene_intersect(KernelGlobals kg,
const ccl_private Ray *ray,
const uint visibility,
ccl_private Intersection *isect)
{
isect->t = ray->tmax;
isect->u = 0.0f;
isect->v = 0.0f;
isect->prim = PRIM_NONE;
isect->object = OBJECT_NONE;
isect->type = PRIMITIVE_NONE;
if (!scene_intersect_valid(ray)) {
isect->t = ray->tmax;
isect->type = PRIMITIVE_NONE;
return false;
}
if (kernel_data.device_bvh == 0) {
return false;
}
hiprtRay ray_hip;
SET_HIPRT_RAY(ray_hip, ray)
RayPayload payload;
payload.self = ray->self;
payload.kg = kg;
payload.visibility = visibility;
payload.prim_type = PRIMITIVE_NONE;
payload.ray_time = ray->time;
hiprtHit hit;
GET_TRAVERSAL_STACK()
if (visibility & PATH_RAY_SHADOW_OPAQUE) {
GET_TRAVERSAL_ANY_HIT(table_closest_intersect, 0, ray->time)
hit = traversal.getNextHit();
}
else {
GET_TRAVERSAL_CLOSEST_HIT(table_closest_intersect, 0, ray->time)
hit = traversal.getNextHit();
}
if (hit.hasHit()) {
set_intersect_point(kg, hit, isect);
if (isect->type > 1) { /* Should be applied only for curves. */
isect->type = payload.prim_type;
isect->prim = hit.primID;
}
return true;
}
return false;
}
ccl_device_intersect bool scene_intersect_shadow(KernelGlobals kg,
const ccl_private Ray *ray,
const uint visibility)
{
Intersection isect;
return scene_intersect(kg, ray, visibility, &isect);
}
#ifdef __BVH_LOCAL__
template<bool single_hit = false>
ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
const ccl_private Ray *ray,
ccl_private LocalIntersection *local_isect,
const int local_object,
ccl_private uint *lcg_state,
const int max_hits)
{
if (local_isect != nullptr) {
local_isect->num_hits = 0;
}
if (!scene_intersect_valid(ray)) {
return false;
}
const int primitive_type = kernel_data_fetch(objects, local_object).primitive_type;
if (!(primitive_type & PRIMITIVE_TRIANGLE)) {
/* Local intersection functions are only considering triangle and motion triangle primitives.
* If the local intersection is requested from other primitives (curve or point cloud) perform
* an early return to avoid tree traversal with no primitive intersection. */
return false;
}
float3 P = ray->P;
float3 dir = bvh_clamp_direction(ray->D);
float3 idir = bvh_inverse_direction(dir);
const int object_flag = kernel_data_fetch(object_flag, local_object);
if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
# if BVH_FEATURE(BVH_MOTION)
bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir);
# else
bvh_instance_push(kg, local_object, ray, &P, &dir, &idir);
# endif
}
hiprtRay ray_hip;
ray_hip.origin = P;
ray_hip.direction = dir;
ray_hip.maxT = ray->tmax;
ray_hip.minT = ray->tmin;
LocalPayload payload = {0};
payload.kg = kg;
payload.self = ray->self;
payload.ray_time = ray->time;
payload.local_object = local_object;
payload.max_hits = max_hits;
payload.lcg_state = lcg_state;
payload.local_isect = local_isect;
GET_TRAVERSAL_STACK()
void *local_geom = (void *)(kernel_data_fetch(blas_ptr, local_object));
hiprtHit hit;
if (primitive_type == PRIMITIVE_MOTION_TRIANGLE) {
/* Motion triangle BVH uses custom primitives which requires custom traversal. */
hiprtGeomCustomTraversalAnyHitCustomStack<Stack> traversal((hiprtGeometry)local_geom,
ray_hip,
stack,
hiprtTraversalHintDefault,
&payload,
kernel_params.table_local_intersect,
2);
hit = traversal.getNextHit();
}
else {
hiprtGeomTraversalAnyHitCustomStack<Stack> traversal((hiprtGeometry)local_geom,
ray_hip,
stack,
hiprtTraversalHintDefault,
&payload,
kernel_params.table_local_intersect,
2);
hit = traversal.getNextHit();
}
return hit.hasHit();
}
#endif /*__BVH_LOCAL__ */
#ifdef __SHADOW_RECORD_ALL__
ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
IntegratorShadowState state,
const ccl_private Ray *ray,
const uint visibility,
const uint max_hits,
ccl_private uint *num_recorded_hits,
ccl_private float *throughput)
{
*throughput = 1.0f;
*num_recorded_hits = 0;
if (!scene_intersect_valid(ray)) {
return false;
}
hiprtRay ray_hip;
SET_HIPRT_RAY(ray_hip, ray)
ShadowPayload payload;
payload.kg = kg;
payload.self = ray->self;
payload.visibility = visibility;
payload.prim_type = PRIMITIVE_NONE;
payload.ray_time = ray->time;
payload.in_state = state;
payload.max_hits = max_hits;
payload.num_hits = 0;
payload.r_num_recorded_hits = num_recorded_hits;
payload.r_throughput = throughput;
GET_TRAVERSAL_STACK()
GET_TRAVERSAL_ANY_HIT(table_shadow_intersect, 1, ray->time)
const hiprtHit hit = traversal.getNextHit();
return hit.hasHit();
}
#endif /* __SHADOW_RECORD_ALL__ */
#ifdef __VOLUME__
ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg,
const ccl_private Ray *ray,
ccl_private Intersection *isect,
const uint visibility)
{
isect->t = ray->tmax;
isect->u = 0.0f;
isect->v = 0.0f;
isect->prim = PRIM_NONE;
isect->object = OBJECT_NONE;
isect->type = PRIMITIVE_NONE;
if (!scene_intersect_valid(ray)) {
return false;
}
hiprtRay ray_hip;
SET_HIPRT_RAY(ray_hip, ray)
RayPayload payload;
payload.self = ray->self;
payload.kg = kg;
payload.visibility = visibility;
payload.prim_type = PRIMITIVE_NONE;
payload.ray_time = ray->time;
GET_TRAVERSAL_STACK()
GET_TRAVERSAL_CLOSEST_HIT(table_volume_intersect, 3, ray->time)
const hiprtHit hit = traversal.getNextHit();
if (hit.hasHit()) {
set_intersect_point(kg, hit, isect);
if (isect->type > 1) { /* Should be applied only for curves. */
isect->type = payload.prim_type;
isect->prim = hit.primID;
}
return true;
}
return false;
}
#endif /* __VOLUME__ */
CCL_NAMESPACE_END