Fix: SSS and Motion Blur or Curves not working on HIP-RT
This change fixes the remaining failing tests with SSS when using HIP-RT. This includes crash when SSS is used on curves, and objects with motion blur and SSS rendering black. The root cause for both cases was the fact that traversal was always assuming regular BVH (built for triangles), while curves and motion triangles are using custom primitives, which requires specialized BVH traversal. This change includes: - Early output from `scene_intersect_local()` for non-triangle and non-motion-triangle primitives. This fixes `sss_hair.blend` test, and also avoids unnecessary BVH traversal when the local intersection is requested from curve object. The same early-output could be added to other BVH traversal implementation. - Use `hiprtGeomCustomTraversalAnyHitCustomStack` for motion triangles primitives. This fixes motion blur on objects with SSS render black. Fixes #135856 Co-authored-by: Sahar A. Kashi <sahar.alipourkashi@amd.com> Co-authored-by: Sergey Sharybin <sergey@blender.org> Pull Request: https://projects.blender.org/blender/blender/pulls/135943
This commit is contained in:
committed by
Sergey Sharybin
parent
8a61555a46
commit
9ad3b74867
@@ -84,10 +84,19 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
|
||||
ccl_private uint *lcg_state,
|
||||
const int max_hits)
|
||||
{
|
||||
if (local_isect != nullptr) {
|
||||
local_isect->num_hits = 0;
|
||||
}
|
||||
|
||||
if (!scene_intersect_valid(ray)) {
|
||||
if (local_isect) {
|
||||
local_isect->num_hits = 0;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
const int primitive_type = kernel_data_fetch(objects, local_object).primitive_type;
|
||||
if (!(primitive_type & PRIMITIVE_TRIANGLE)) {
|
||||
/* Local intersection functions are only considering triangle and motion triangle orimitives.
|
||||
* If the local intersection is requested from other primitives (curve or point cloud) perform
|
||||
* an early return to avoid tree travsersal with no primitive intersection. */
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -95,13 +104,8 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
|
||||
float3 dir = bvh_clamp_direction(ray->D);
|
||||
float3 idir = bvh_inverse_direction(dir);
|
||||
|
||||
if (local_isect != nullptr) {
|
||||
local_isect->num_hits = 0;
|
||||
}
|
||||
|
||||
const int object_flag = kernel_data_fetch(object_flag, local_object);
|
||||
if (!(object_flag & SD_OBJECT_TRANSFORM_APPLIED)) {
|
||||
|
||||
# if BVH_FEATURE(BVH_MOTION)
|
||||
bvh_instance_motion_push(kg, local_object, ray, &P, &dir, &idir);
|
||||
# else
|
||||
@@ -127,20 +131,30 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
|
||||
GET_TRAVERSAL_STACK()
|
||||
|
||||
void *local_geom = (void *)(kernel_data_fetch(blas_ptr, local_object));
|
||||
// we don't need custom intersection functions for SSR
|
||||
# ifdef HIPRT_SHARED_STACK
|
||||
hiprtGeomTraversalAnyHitCustomStack<Stack> traversal((hiprtGeometry)local_geom,
|
||||
ray_hip,
|
||||
stack,
|
||||
hiprtTraversalHintDefault,
|
||||
&payload,
|
||||
kernel_params.table_local_intersect,
|
||||
2);
|
||||
# else
|
||||
hiprtGeomTraversalAnyHit traversal(
|
||||
local_geom, ray_hip, table, hiprtTraversalHintDefault, &payload);
|
||||
# endif
|
||||
hiprtHit hit = traversal.getNextHit();
|
||||
|
||||
hiprtHit hit;
|
||||
if (primitive_type == PRIMITIVE_MOTION_TRIANGLE) {
|
||||
/* Motion triangle BVH uses custom primitives which requires custom traversal. */
|
||||
hiprtGeomCustomTraversalAnyHitCustomStack<Stack> traversal((hiprtGeometry)local_geom,
|
||||
ray_hip,
|
||||
stack,
|
||||
hiprtTraversalHintDefault,
|
||||
&payload,
|
||||
kernel_params.table_local_intersect,
|
||||
2);
|
||||
hit = traversal.getNextHit();
|
||||
}
|
||||
else {
|
||||
hiprtGeomTraversalAnyHitCustomStack<Stack> traversal((hiprtGeometry)local_geom,
|
||||
ray_hip,
|
||||
stack,
|
||||
hiprtTraversalHintDefault,
|
||||
&payload,
|
||||
kernel_params.table_local_intersect,
|
||||
2);
|
||||
hit = traversal.getNextHit();
|
||||
}
|
||||
|
||||
return hit.hasHit();
|
||||
}
|
||||
#endif //__BVH_LOCAL__
|
||||
|
||||
@@ -28,7 +28,6 @@ struct ShadowPayload {
|
||||
struct LocalPayload {
|
||||
KernelGlobals kg;
|
||||
RaySelfPrimitives self;
|
||||
int prim_type;
|
||||
float ray_time;
|
||||
int local_object;
|
||||
uint max_hits;
|
||||
@@ -42,56 +41,35 @@ struct LocalPayload {
|
||||
RAY_RT.maxT = RAY->tmax; \
|
||||
RAY_RT.minT = RAY->tmin;
|
||||
|
||||
# if defined(HIPRT_SHARED_STACK)
|
||||
# define GET_TRAVERSAL_STACK() \
|
||||
Stack stack(kg->global_stack_buffer, kg->shared_stack); \
|
||||
Instance_Stack instance_stack;
|
||||
# else
|
||||
# define GET_TRAVERSAL_STACK()
|
||||
# endif
|
||||
# define GET_TRAVERSAL_STACK() \
|
||||
Stack stack(kg->global_stack_buffer, kg->shared_stack); \
|
||||
Instance_Stack instance_stack;
|
||||
|
||||
# ifdef HIPRT_SHARED_STACK
|
||||
# define GET_TRAVERSAL_ANY_HIT(FUNCTION_TABLE, RAY_TYPE, RAY_TIME) \
|
||||
hiprtSceneTraversalAnyHitCustomStack<Stack, Instance_Stack> traversal( \
|
||||
(hiprtScene)kernel_data.device_bvh, \
|
||||
ray_hip, \
|
||||
stack, \
|
||||
instance_stack, \
|
||||
visibility, \
|
||||
hiprtTraversalHintDefault, \
|
||||
&payload, \
|
||||
kernel_params.FUNCTION_TABLE, \
|
||||
RAY_TYPE, \
|
||||
RAY_TIME);
|
||||
# define GET_TRAVERSAL_ANY_HIT(FUNCTION_TABLE, RAY_TYPE, RAY_TIME) \
|
||||
hiprtSceneTraversalAnyHitCustomStack<Stack, Instance_Stack> traversal( \
|
||||
(hiprtScene)kernel_data.device_bvh, \
|
||||
ray_hip, \
|
||||
stack, \
|
||||
instance_stack, \
|
||||
visibility, \
|
||||
hiprtTraversalHintDefault, \
|
||||
&payload, \
|
||||
kernel_params.FUNCTION_TABLE, \
|
||||
RAY_TYPE, \
|
||||
RAY_TIME);
|
||||
|
||||
# define GET_TRAVERSAL_CLOSEST_HIT(FUNCTION_TABLE, RAY_TYPE, RAY_TIME) \
|
||||
hiprtSceneTraversalClosestCustomStack<Stack, Instance_Stack> traversal( \
|
||||
(hiprtScene)kernel_data.device_bvh, \
|
||||
ray_hip, \
|
||||
stack, \
|
||||
instance_stack, \
|
||||
visibility, \
|
||||
hiprtTraversalHintDefault, \
|
||||
&payload, \
|
||||
kernel_params.FUNCTION_TABLE, \
|
||||
RAY_TYPE, \
|
||||
RAY_TIME);
|
||||
# else
|
||||
# define GET_TRAVERSAL_ANY_HIT(FUNCTION_TABLE) \
|
||||
hiprtSceneTraversalAnyHit traversal(kernel_data.device_bvh, \
|
||||
ray_hip, \
|
||||
visibility, \
|
||||
FUNCTION_TABLE, \
|
||||
hiprtTraversalHintDefault, \
|
||||
&payload);
|
||||
# define GET_TRAVERSAL_CLOSEST_HIT(FUNCTION_TABLE) \
|
||||
hiprtSceneTraversalClosest traversal(kernel_data.device_bvh, \
|
||||
ray_hip, \
|
||||
visibility, \
|
||||
FUNCTION_TABLE, \
|
||||
hiprtTraversalHintDefault, \
|
||||
&payload);
|
||||
# endif
|
||||
# define GET_TRAVERSAL_CLOSEST_HIT(FUNCTION_TABLE, RAY_TYPE, RAY_TIME) \
|
||||
hiprtSceneTraversalClosestCustomStack<Stack, Instance_Stack> traversal( \
|
||||
(hiprtScene)kernel_data.device_bvh, \
|
||||
ray_hip, \
|
||||
stack, \
|
||||
instance_stack, \
|
||||
visibility, \
|
||||
hiprtTraversalHintDefault, \
|
||||
&payload, \
|
||||
kernel_params.FUNCTION_TABLE, \
|
||||
RAY_TYPE, \
|
||||
RAY_TIME);
|
||||
|
||||
ccl_device_inline void set_intersect_point(KernelGlobals kg,
|
||||
hiprtHit &hit,
|
||||
@@ -243,23 +221,19 @@ ccl_device_inline bool motion_triangle_custom_local_intersect(const hiprtRay &ra
|
||||
|
||||
LocalIntersection *local_isect = local_payload->local_isect;
|
||||
|
||||
bool b_hit = motion_triangle_intersect_local(kg,
|
||||
local_isect,
|
||||
ray.origin,
|
||||
ray.direction,
|
||||
local_payload->ray_time,
|
||||
object_id,
|
||||
prim_id_global,
|
||||
prim_id_local,
|
||||
ray.minT,
|
||||
ray.maxT,
|
||||
local_payload->lcg_state,
|
||||
local_payload->max_hits);
|
||||
return motion_triangle_intersect_local(kg,
|
||||
local_isect,
|
||||
ray.origin,
|
||||
ray.direction,
|
||||
local_payload->ray_time,
|
||||
object_id,
|
||||
prim_id_global,
|
||||
prim_id_local,
|
||||
ray.minT,
|
||||
ray.maxT,
|
||||
local_payload->lcg_state,
|
||||
local_payload->max_hits);
|
||||
|
||||
if (b_hit) {
|
||||
local_payload->prim_type = PRIMITIVE_MOTION_TRIANGLE;
|
||||
}
|
||||
return b_hit;
|
||||
# else
|
||||
return false;
|
||||
# endif
|
||||
@@ -585,18 +559,33 @@ ccl_device_inline bool local_intersection_filter(const hiprtRay &ray,
|
||||
# ifdef __BVH_LOCAL__
|
||||
LocalPayload *payload = (LocalPayload *)user_data;
|
||||
KernelGlobals kg = payload->kg;
|
||||
int object_id = payload->local_object;
|
||||
int prim_offset = kernel_data_fetch(object_prim_offset, object_id);
|
||||
int prim = hit.primID + prim_offset;
|
||||
const int object_id = payload->local_object;
|
||||
const uint max_hits = payload->max_hits;
|
||||
|
||||
/* Triangle primitive uses hardware intersection, other primitives do custom intersection
|
||||
* which does reservoir samlping for intersections. For the custom primitives only check
|
||||
* whether we can stop travsersal early on. The rest of the checks here only do for the
|
||||
* regular triangles. */
|
||||
const int primitive_type = kernel_data_fetch(objects, object_id).primitive_type;
|
||||
if (primitive_type != PRIMITIVE_TRIANGLE) {
|
||||
if (max_hits == 0) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
const int prim_offset = kernel_data_fetch(object_prim_offset, object_id);
|
||||
const int prim = hit.primID + prim_offset;
|
||||
# ifndef __RAY_OFFSET__
|
||||
if (intersection_skip_self_local(payload->self, prim)) {
|
||||
return true; // continue search
|
||||
}
|
||||
# endif
|
||||
uint max_hits = payload->max_hits;
|
||||
|
||||
if (max_hits == 0) {
|
||||
return false; // stop search
|
||||
}
|
||||
|
||||
int hit_index = 0;
|
||||
if (payload->lcg_state) {
|
||||
for (int i = min(max_hits, payload->local_isect->num_hits) - 1; i >= 0; --i) {
|
||||
@@ -618,19 +607,20 @@ ccl_device_inline bool local_intersection_filter(const hiprtRay &ray,
|
||||
}
|
||||
payload->local_isect->num_hits = 1;
|
||||
}
|
||||
|
||||
Intersection *isect = &payload->local_isect->hits[hit_index];
|
||||
isect->t = hit.t;
|
||||
isect->prim = prim;
|
||||
isect->object = object_id;
|
||||
isect->type = PRIMITIVE_TRIANGLE; // kernel_data_fetch(__objects, object_id).primitive_type;
|
||||
|
||||
isect->u = hit.uv.x;
|
||||
isect->v = hit.uv.y;
|
||||
isect->prim = prim;
|
||||
isect->object = object_id;
|
||||
isect->type = primitive_type;
|
||||
|
||||
payload->local_isect->Ng[hit_index] = hit.normal;
|
||||
|
||||
return true;
|
||||
|
||||
# else
|
||||
return false;
|
||||
# endif
|
||||
}
|
||||
|
||||
|
||||
@@ -14,8 +14,6 @@
|
||||
#include "util/color.h" // IWYU pragma: export
|
||||
#include "util/texture.h" // IWYU pragma: export
|
||||
|
||||
#define HIPRT_SHARED_STACK
|
||||
|
||||
/* The size of global stack available to each thread (memory reserved for each thread in
|
||||
* global_stack_buffer). */
|
||||
#define HIPRT_THREAD_STACK_SIZE 64
|
||||
@@ -36,29 +34,20 @@ CCL_NAMESPACE_BEGIN
|
||||
|
||||
struct KernelGlobalsGPU {
|
||||
hiprtGlobalStackBuffer global_stack_buffer;
|
||||
#ifdef HIPRT_SHARED_STACK
|
||||
hiprtSharedStackBuffer shared_stack;
|
||||
#endif
|
||||
};
|
||||
|
||||
using KernelGlobals = ccl_global KernelGlobalsGPU *ccl_restrict;
|
||||
|
||||
#if defined(HIPRT_SHARED_STACK)
|
||||
|
||||
/* This macro allocates shared memory and to pass the shared memory down to intersection functions
|
||||
* KernelGlobals is used. */
|
||||
# define HIPRT_INIT_KERNEL_GLOBAL() \
|
||||
ccl_gpu_shared int shared_stack[HIPRT_SHARED_STACK_SIZE * HIPRT_THREAD_GROUP_SIZE]; \
|
||||
ccl_global KernelGlobalsGPU kg_gpu; \
|
||||
KernelGlobals kg = &kg_gpu; \
|
||||
kg->shared_stack.stackData = &shared_stack[0]; \
|
||||
kg->shared_stack.stackSize = HIPRT_SHARED_STACK_SIZE; \
|
||||
kg->global_stack_buffer = stack_buffer;
|
||||
#else
|
||||
# define HIPRT_INIT_KERNEL_GLOBAL() \
|
||||
KernelGlobals kg = nullptr; \
|
||||
kg->global_stack_buffer = stack_buffer;
|
||||
#endif
|
||||
#define HIPRT_INIT_KERNEL_GLOBAL() \
|
||||
ccl_gpu_shared int shared_stack[HIPRT_SHARED_STACK_SIZE * HIPRT_THREAD_GROUP_SIZE]; \
|
||||
ccl_global KernelGlobalsGPU kg_gpu; \
|
||||
KernelGlobals kg = &kg_gpu; \
|
||||
kg->shared_stack.stackData = &shared_stack[0]; \
|
||||
kg->shared_stack.stackSize = HIPRT_SHARED_STACK_SIZE; \
|
||||
kg->global_stack_buffer = stack_buffer;
|
||||
|
||||
struct KernelParamsHIPRT {
|
||||
KernelData data;
|
||||
@@ -149,11 +138,8 @@ enum Filter_Function_Table_Index {
|
||||
#ifdef __KERNEL_GPU__
|
||||
__constant__ KernelParamsHIPRT kernel_params;
|
||||
|
||||
# ifdef HIPRT_SHARED_STACK
|
||||
typedef hiprtGlobalStack Stack;
|
||||
typedef hiprtEmptyInstanceStack Instance_Stack;
|
||||
# endif
|
||||
|
||||
#endif
|
||||
|
||||
/* Abstraction macros */
|
||||
|
||||
Reference in New Issue
Block a user