Files
test/intern/cycles/kernel/device/hiprt/globals.h
Sahar A. Kashi 26ed4d3892 Cycles: Linux Support for HIP-RT
This change switches Cycles to an opensource HIP-RT library which
implements hardware ray-tracing. This library is now used on
both Windows and Linux. While there should be no noticeable changes
on Windows, on Linux this adds support for hardware ray-tracing on
AMD GPUs.

The majority of the change is typical platform code to add new
library to the dependency builder, and a change in the way how
ahead-of-time (AoT) kernels are compiled. There are changes in
Cycles itself, but they are rather straightforward: some APIs
changed in the opensource version of the library.

There are a couple of extra files which are needed for this to
work: hiprt02003_6.1_amd.hipfb and oro_compiled_kernels.hipfb.
There are some assumptions in the HIP-RT library about how they
are available. Currently they follow the same rule as AoT
kernels for oneAPI:
- On Windows they are next to blender.exe
- On Linux they are in the lib/ folder

Performance comparison on Ubuntu 22.04.5:
```
GPU: AMD Radeon PRO W7800
Driver: amdgpu-install_6.1.60103-1_all.deb
                       main         hip-rt
attic                  0.1414s      0.0932s
barbershop_interior    0.1563s      0.1258s
bistro                 0.2134s      0.1597s
bmw27                  0.0119s      0.0099s
classroom              0.1006s      0.0803s
fishy_cat              0.0248s      0.0178s
junkshop               0.0916s      0.0713s
koro                   0.0589s      0.0720s
monster                0.0435s      0.0385s
pabellon               0.0543s      0.0391s
sponza                 0.0223s      0.0180s
spring                 0.1026s      1.5145s
victor                 0.1901s      0.1239s
wdas_cloud             0.1153s      0.1125s
```

Co-authored-by: Brecht Van Lommel <brecht@blender.org>
Co-authored-by: Ray Molenkamp <github@lazydodo.com>
Co-authored-by: Sergey Sharybin <sergey@blender.org>

Pull Request: https://projects.blender.org/blender/blender/pulls/121050
2024-09-24 14:35:24 +02:00

162 lines
6.3 KiB
C

/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
*
* SPDX-License-Identifier: Apache-2.0 */
#pragma once
#include "kernel/types.h"
#include "kernel/integrator/state.h"
#include "kernel/util/profiling.h"
#define HIPRT_SHARED_STACK
/* The size of global stack available to each thread (memory reserved for each thread in
* global_stack_buffer). */
#define HIPRT_THREAD_STACK_SIZE 64
/* LDS (Local Data Storage) allocation for each thread, the number is obtained empirically. */
#define HIPRT_SHARED_STACK_SIZE 24
/* HIPRT_THREAD_GROUP_SIZE is the number of threads per work group for intersection kernels
* The default number of threads per work-group is 1024, however, since HIP RT intersection kernels
* use local memory, and the local memory size in those kernels scales up with the number of
* threads, the number of threads to is scaled down to 256 to avoid going over maximum local memory
* and to strike a balance between memory access and the number of waves.
*
* Total local stack size would be number of threads * HIPRT_SHARED_STACK_SIZE. */
#define HIPRT_THREAD_GROUP_SIZE 256
CCL_NAMESPACE_BEGIN
struct KernelGlobalsGPU {
hiprtGlobalStackBuffer global_stack_buffer;
#ifdef HIPRT_SHARED_STACK
hiprtSharedStackBuffer shared_stack;
#endif
};
typedef ccl_global KernelGlobalsGPU *ccl_restrict KernelGlobals;
#if defined(HIPRT_SHARED_STACK)
/* This macro allocates shared memory and to pass the shared memory down to intersection functions
* KernelGlobals is used. */
# define HIPRT_INIT_KERNEL_GLOBAL() \
ccl_gpu_shared int shared_stack[HIPRT_SHARED_STACK_SIZE * HIPRT_THREAD_GROUP_SIZE]; \
ccl_global KernelGlobalsGPU kg_gpu; \
KernelGlobals kg = &kg_gpu; \
kg->shared_stack.stackData = &shared_stack[0]; \
kg->shared_stack.stackSize = HIPRT_SHARED_STACK_SIZE; \
kg->global_stack_buffer = stack_buffer;
#else
# define HIPRT_INIT_KERNEL_GLOBAL() \
KernelGlobals kg = NULL; \
kg->global_stack_buffer = stack_buffer;
#endif
struct KernelParamsHIPRT {
KernelData data;
#define KERNEL_DATA_ARRAY(type, name) const type *name;
KERNEL_DATA_ARRAY(int, user_instance_id)
KERNEL_DATA_ARRAY(uint64_t, blas_ptr)
KERNEL_DATA_ARRAY(int2, custom_prim_info)
KERNEL_DATA_ARRAY(int2, custom_prim_info_offset)
KERNEL_DATA_ARRAY(float2, prims_time)
KERNEL_DATA_ARRAY(int, prim_time_offset)
#include "kernel/data_arrays.h"
/* Integrator state */
IntegratorStateGPU integrator_state;
hiprtFuncTable table_closest_intersect;
hiprtFuncTable table_shadow_intersect;
hiprtFuncTable table_local_intersect;
hiprtFuncTable table_volume_intersect;
};
/* Intersection_Function_Table_Index defines index values to retrieve custom intersection
* functions from function table. */
enum Intersection_Function_Table_Index {
// Triangles use the intersection function provided by HIP RT and don't need custom intersection
// functions
// Custom intersection functions for closest intersect.
Curve_Intersect_Function = 1, // Custom intersection for curves
Motion_Triangle_Intersect_Function, // Custom intersection for triangles with vertex motion blur
// attributes.
Point_Intersect_Function, // Custom intersection for point cloud.
// Custom intersection functions for shadow rendering are the same as the function for closest
// intersect.
// However, the table indices are different
Triangle_Intersect_Shadow_None,
Curve_Intersect_Shadow,
Motion_Triangle_Intersect_Shadow,
Point_Intersect_Shadow,
// Custom intersection functions for subsurface scattering.
// Only motion triangles have valid custom intersection function
Triangle_Intersect_Local_None,
Curve_Intersect_Local_None,
Motion_Triangle_Intersect_Local,
Point_Intersect_Local_None,
// Custom intersection functions for volume rendering.
// Only motion triangles have valid custom intersection function
Triangle_Intersect_Volume_None,
Curve_Intersect_Volume_None,
Motion_Triangle_Intersect_Volume,
Point_Intersect_Volume_None,
};
// Filter functions, filter hits, i.e. test whether a hit should be accepted or not, and whether
// traversal should stop or continue.
enum Filter_Function_Table_Index {
Triangle_Filter_Closest = 0, // Filter function for triangles for closest intersect, no custom
// intersection function is needed.
Curve_Filter_Opaque_None, // No filter function is needed and everything is handled in the
// intersection function.
Motion_Triangle_Filter_Opaque_None, // No filter function is needed and everything is handled in
// intersection function.
Point_Filter_Opaque_Non, // No filter function is needed.
// Filter function for all primitives for shadow intersection.
// All primitives use the same function but each has a different index in the table.
Triangle_Filter_Shadow,
Curve_Filter_Shadow,
Motion_Triangle_Filter_Shadow,
Point_Filter_Shadow,
// Filter functions for subsurface scattering. Triangles and motion triangles need function
// assignment. They indices for triangles and motion triangles point to the same function. Points
// and curves dont need any function since subsurface scattering is not applied on either.
Triangle_Filter_Local, // Filter functions for triangles
Curve_Filter_Local_None, // Subsurface scattering is not applied on curves, no filter function
// is
// needed.
Motion_Triangle_Filter_Local,
Point_Filter_Local_None,
// Filter functions for volume rendering.
// Volume rendering only applies to triangles and motion triangles.
// Triangles and motion triangles use the same filter functions for volume rendering
Triangle_Filter_Volume,
Curve_Filter_Volume_None,
Motion_Triangle_Filter_Volume,
Point_Filter_Volume_None,
};
#ifdef __KERNEL_GPU__
__constant__ KernelParamsHIPRT kernel_params;
# ifdef HIPRT_SHARED_STACK
typedef hiprtGlobalStack Stack;
typedef hiprtEmptyInstanceStack Instance_Stack;
# endif
#endif
/* Abstraction macros */
#define kernel_data kernel_params.data
#define kernel_data_fetch(name, index) kernel_params.name[(index)]
#define kernel_data_array(name) (kernel_params.name)
#define kernel_integrator_state kernel_params.integrator_state
CCL_NAMESPACE_END