Files
test2/intern/cycles/kernel/device/hiprt/hiprt_kernels.h
Sahar A. Kashi 26ed4d3892 Cycles: Linux Support for HIP-RT
This change switches Cycles to an opensource HIP-RT library which
implements hardware ray-tracing. This library is now used on
both Windows and Linux. While there should be no noticeable changes
on Windows, on Linux this adds support for hardware ray-tracing on
AMD GPUs.

The majority of the change is typical platform code to add new
library to the dependency builder, and a change in the way how
ahead-of-time (AoT) kernels are compiled. There are changes in
Cycles itself, but they are rather straightforward: some APIs
changed in the opensource version of the library.

There are a couple of extra files which are needed for this to
work: hiprt02003_6.1_amd.hipfb and oro_compiled_kernels.hipfb.
There are some assumptions in the HIP-RT library about how they
are available. Currently they follow the same rule as AoT
kernels for oneAPI:
- On Windows they are next to blender.exe
- On Linux they are in the lib/ folder

Performance comparison on Ubuntu 22.04.5:
```
GPU: AMD Radeon PRO W7800
Driver: amdgpu-install_6.1.60103-1_all.deb
                       main         hip-rt
attic                  0.1414s      0.0932s
barbershop_interior    0.1563s      0.1258s
bistro                 0.2134s      0.1597s
bmw27                  0.0119s      0.0099s
classroom              0.1006s      0.0803s
fishy_cat              0.0248s      0.0178s
junkshop               0.0916s      0.0713s
koro                   0.0589s      0.0720s
monster                0.0435s      0.0385s
pabellon               0.0543s      0.0391s
sponza                 0.0223s      0.0180s
spring                 0.1026s      1.5145s
victor                 0.1901s      0.1239s
wdas_cloud             0.1153s      0.1125s
```

Co-authored-by: Brecht Van Lommel <brecht@blender.org>
Co-authored-by: Ray Molenkamp <github@lazydodo.com>
Co-authored-by: Sergey Sharybin <sergey@blender.org>

Pull Request: https://projects.blender.org/blender/blender/pulls/121050
2024-09-24 14:35:24 +02:00

119 lines
4.6 KiB
C

/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
*
* SPDX-License-Identifier: Apache-2.0 */
#ifdef __HIPRT__
ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
ccl_gpu_kernel_signature(integrator_intersect_closest,
ccl_global const int *path_index_array,
ccl_global float *render_buffer,
const int work_size,
ccl_global hiprtGlobalStackBuffer stack_buffer)
{
const int global_index = ccl_gpu_global_id_x();
if (global_index < work_size) {
HIPRT_INIT_KERNEL_GLOBAL()
const int state = (path_index_array) ? path_index_array[global_index] : global_index;
ccl_gpu_kernel_call(integrator_intersect_closest(kg, state, render_buffer));
}
}
ccl_gpu_kernel_postfix
ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
ccl_gpu_kernel_signature(integrator_intersect_shadow,
ccl_global const int *path_index_array,
const int work_size,
ccl_global hiprtGlobalStackBuffer stack_buffer)
{
const int global_index = ccl_gpu_global_id_x();
if (global_index < work_size) {
HIPRT_INIT_KERNEL_GLOBAL()
const int state = (path_index_array) ? path_index_array[global_index] : global_index;
ccl_gpu_kernel_call(integrator_intersect_shadow(kg, state));
}
}
ccl_gpu_kernel_postfix
ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
ccl_gpu_kernel_signature(integrator_intersect_subsurface,
ccl_global const int *path_index_array,
const int work_size,
ccl_global hiprtGlobalStackBuffer stack_buffer)
{
const int global_index = ccl_gpu_global_id_x();
if (global_index < work_size) {
HIPRT_INIT_KERNEL_GLOBAL()
const int state = (path_index_array) ? path_index_array[global_index] : global_index;
ccl_gpu_kernel_call(integrator_intersect_subsurface(kg, state));
}
}
ccl_gpu_kernel_postfix
ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
ccl_gpu_kernel_signature(integrator_intersect_volume_stack,
ccl_global const int *path_index_array,
const int work_size,
ccl_global hiprtGlobalStackBuffer stack_buffer)
{
const int global_index = ccl_gpu_global_id_x();
if (global_index < work_size) {
HIPRT_INIT_KERNEL_GLOBAL()
const int state = (path_index_array) ? path_index_array[global_index] : global_index;
ccl_gpu_kernel_call(integrator_intersect_volume_stack(kg, state));
}
}
ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
ccl_gpu_kernel_signature(integrator_intersect_dedicated_light,
ccl_global const int *path_index_array,
const int work_size,
ccl_global hiprtGlobalStackBuffer stack_buffer)
{
const int global_index = ccl_gpu_global_id_x();
if (global_index < work_size) {
HIPRT_INIT_KERNEL_GLOBAL()
const int state = (path_index_array) ? path_index_array[global_index] : global_index;
ccl_gpu_kernel_call(integrator_intersect_dedicated_light(kg, state));
}
}
ccl_gpu_kernel_postfix
ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
ccl_gpu_kernel_signature(integrator_shade_surface_raytrace,
ccl_global const int *path_index_array,
ccl_global float *render_buffer,
const int work_size,
ccl_global hiprtGlobalStackBuffer stack_buffer)
{
const int global_index = ccl_gpu_global_id_x();
if (global_index < work_size) {
HIPRT_INIT_KERNEL_GLOBAL()
const int state = (path_index_array) ? path_index_array[global_index] : global_index;
ccl_gpu_kernel_call(integrator_shade_surface_raytrace(kg, state, render_buffer));
}
}
ccl_gpu_kernel_postfix
ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
ccl_gpu_kernel_signature(integrator_shade_surface_mnee,
ccl_global const int *path_index_array,
ccl_global float *render_buffer,
const int work_size,
ccl_global hiprtGlobalStackBuffer stack_buffer)
{
const int global_index = ccl_gpu_global_id_x();
if (global_index < work_size) {
HIPRT_INIT_KERNEL_GLOBAL()
const int state = (path_index_array) ? path_index_array[global_index] : global_index;
ccl_gpu_kernel_call(integrator_shade_surface_mnee(kg, state, render_buffer));
}
}
ccl_gpu_kernel_postfix
#endif /* __HIPRT__ */