Files
test2/intern/cycles/kernel/device/hiprt/globals.h
Sahar A. Kashi 9ad3b74867 Fix: SSS and Motion Blur or Curves not working on HIP-RT
This change fixes the remaining failing tests with SSS when using HIP-RT.
This includes crash when SSS is used on curves, and objects with motion
blur and SSS rendering black.

The root cause for both cases was the fact that traversal was always
assuming regular BVH (built for triangles), while curves and motion
triangles are using custom primitives, which requires specialized BVH
traversal.

This change includes:

- Early output from `scene_intersect_local()` for non-triangle and
  non-motion-triangle primitives. This fixes `sss_hair.blend` test,
  and also avoids unnecessary BVH traversal when the local intersection
  is requested from curve object. The same early-output could be added
  to other BVH traversal implementation.

- Use `hiprtGeomCustomTraversalAnyHitCustomStack` for motion triangles
  primitives. This fixes motion blur on objects with SSS render black.

Fixes #135856

Co-authored-by: Sahar A. Kashi <sahar.alipourkashi@amd.com>
Co-authored-by: Sergey Sharybin <sergey@blender.org>

Pull Request: https://projects.blender.org/blender/blender/pulls/135943
2025-03-14 18:17:54 +01:00

152 lines
6.1 KiB
C

/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
*
* SPDX-License-Identifier: Apache-2.0 */
/* Constant Globals */
#pragma once
#include "kernel/types.h"
#include "kernel/integrator/state.h"
#include "kernel/util/profiler.h" // IWYU pragma: export
#include "util/color.h" // IWYU pragma: export
#include "util/texture.h" // IWYU pragma: export
/* The size of global stack available to each thread (memory reserved for each thread in
* global_stack_buffer). */
#define HIPRT_THREAD_STACK_SIZE 64
/* LDS (Local Data Storage) allocation for each thread, the number is obtained empirically. */
#define HIPRT_SHARED_STACK_SIZE 24
/* HIPRT_THREAD_GROUP_SIZE is the number of threads per work group for intersection kernels
* The default number of threads per work-group is 1024, however, since HIP RT intersection kernels
* use local memory, and the local memory size in those kernels scales up with the number of
* threads, the number of threads to is scaled down to 256 to avoid going over maximum local memory
* and to strike a balance between memory access and the number of waves.
*
* Total local stack size would be number of threads * HIPRT_SHARED_STACK_SIZE. */
#define HIPRT_THREAD_GROUP_SIZE 256
CCL_NAMESPACE_BEGIN
struct KernelGlobalsGPU {
hiprtGlobalStackBuffer global_stack_buffer;
hiprtSharedStackBuffer shared_stack;
};
using KernelGlobals = ccl_global KernelGlobalsGPU *ccl_restrict;
/* This macro allocates shared memory and to pass the shared memory down to intersection functions
* KernelGlobals is used. */
#define HIPRT_INIT_KERNEL_GLOBAL() \
ccl_gpu_shared int shared_stack[HIPRT_SHARED_STACK_SIZE * HIPRT_THREAD_GROUP_SIZE]; \
ccl_global KernelGlobalsGPU kg_gpu; \
KernelGlobals kg = &kg_gpu; \
kg->shared_stack.stackData = &shared_stack[0]; \
kg->shared_stack.stackSize = HIPRT_SHARED_STACK_SIZE; \
kg->global_stack_buffer = stack_buffer;
struct KernelParamsHIPRT {
KernelData data;
#define KERNEL_DATA_ARRAY(type, name) const type *name;
KERNEL_DATA_ARRAY(int, user_instance_id)
KERNEL_DATA_ARRAY(uint64_t, blas_ptr)
KERNEL_DATA_ARRAY(int2, custom_prim_info)
KERNEL_DATA_ARRAY(int2, custom_prim_info_offset)
KERNEL_DATA_ARRAY(float2, prims_time)
KERNEL_DATA_ARRAY(int, prim_time_offset)
#include "kernel/data_arrays.h"
/* Integrator state */
IntegratorStateGPU integrator_state;
hiprtFuncTable table_closest_intersect;
hiprtFuncTable table_shadow_intersect;
hiprtFuncTable table_local_intersect;
hiprtFuncTable table_volume_intersect;
};
/* Intersection_Function_Table_Index defines index values to retrieve custom intersection
* functions from function table. */
enum Intersection_Function_Table_Index {
// Triangles use the intersection function provided by HIP RT and don't need custom intersection
// functions
// Custom intersection functions for closest intersect.
Curve_Intersect_Function = 1, // Custom intersection for curves
Motion_Triangle_Intersect_Function, // Custom intersection for triangles with vertex motion blur
// attributes.
Point_Intersect_Function, // Custom intersection for point cloud.
// Custom intersection functions for shadow rendering are the same as the function for closest
// intersect.
// However, the table indices are different
Triangle_Intersect_Shadow_None,
Curve_Intersect_Shadow,
Motion_Triangle_Intersect_Shadow,
Point_Intersect_Shadow,
// Custom intersection functions for subsurface scattering.
// Only motion triangles have valid custom intersection function
Triangle_Intersect_Local_None,
Curve_Intersect_Local_None,
Motion_Triangle_Intersect_Local,
Point_Intersect_Local_None,
// Custom intersection functions for volume rendering.
// Only motion triangles have valid custom intersection function
Triangle_Intersect_Volume_None,
Curve_Intersect_Volume_None,
Motion_Triangle_Intersect_Volume,
Point_Intersect_Volume_None,
};
// Filter functions, filter hits, i.e. test whether a hit should be accepted or not, and whether
// traversal should stop or continue.
enum Filter_Function_Table_Index {
Triangle_Filter_Closest = 0, // Filter function for triangles for closest intersect, no custom
// intersection function is needed.
Curve_Filter_Opaque_None, // No filter function is needed and everything is handled in the
// intersection function.
Motion_Triangle_Filter_Opaque_None, // No filter function is needed and everything is handled in
// intersection function.
Point_Filter_Opaque_Non, // No filter function is needed.
// Filter function for all primitives for shadow intersection.
// All primitives use the same function but each has a different index in the table.
Triangle_Filter_Shadow,
Curve_Filter_Shadow,
Motion_Triangle_Filter_Shadow,
Point_Filter_Shadow,
// Filter functions for subsurface scattering. Triangles and motion triangles need function
// assignment. They indices for triangles and motion triangles point to the same function. Points
// and curves dont need any function since subsurface scattering is not applied on either.
Triangle_Filter_Local, // Filter functions for triangles
Curve_Filter_Local_None, // Subsurface scattering is not applied on curves, no filter function
// is
// needed.
Motion_Triangle_Filter_Local,
Point_Filter_Local_None,
// Filter functions for volume rendering.
// Volume rendering only applies to triangles and motion triangles.
// Triangles and motion triangles use the same filter functions for volume rendering
Triangle_Filter_Volume,
Curve_Filter_Volume_None,
Motion_Triangle_Filter_Volume,
Point_Filter_Volume_None,
};
#ifdef __KERNEL_GPU__
__constant__ KernelParamsHIPRT kernel_params;
typedef hiprtGlobalStack Stack;
typedef hiprtEmptyInstanceStack Instance_Stack;
#endif
/* Abstraction macros */
#define kernel_data kernel_params.data
#define kernel_data_fetch(name, index) kernel_params.name[(index)]
#define kernel_data_array(name) (kernel_params.name)
#define kernel_integrator_state kernel_params.integrator_state
CCL_NAMESPACE_END