This change switches Cycles to an opensource HIP-RT library which
implements hardware ray-tracing. This library is now used on
both Windows and Linux. While there should be no noticeable changes
on Windows, on Linux this adds support for hardware ray-tracing on
AMD GPUs.
The majority of the change is typical platform code to add new
library to the dependency builder, and a change in the way how
ahead-of-time (AoT) kernels are compiled. There are changes in
Cycles itself, but they are rather straightforward: some APIs
changed in the opensource version of the library.
There are a couple of extra files which are needed for this to
work: hiprt02003_6.1_amd.hipfb and oro_compiled_kernels.hipfb.
There are some assumptions in the HIP-RT library about how they
are available. Currently they follow the same rule as AoT
kernels for oneAPI:
- On Windows they are next to blender.exe
- On Linux they are in the lib/ folder
Performance comparison on Ubuntu 22.04.5:
```
GPU: AMD Radeon PRO W7800
Driver: amdgpu-install_6.1.60103-1_all.deb
main hip-rt
attic 0.1414s 0.0932s
barbershop_interior 0.1563s 0.1258s
bistro 0.2134s 0.1597s
bmw27 0.0119s 0.0099s
classroom 0.1006s 0.0803s
fishy_cat 0.0248s 0.0178s
junkshop 0.0916s 0.0713s
koro 0.0589s 0.0720s
monster 0.0435s 0.0385s
pabellon 0.0543s 0.0391s
sponza 0.0223s 0.0180s
spring 0.1026s 1.5145s
victor 0.1901s 0.1239s
wdas_cloud 0.1153s 0.1125s
```
Co-authored-by: Brecht Van Lommel <brecht@blender.org>
Co-authored-by: Ray Molenkamp <github@lazydodo.com>
Co-authored-by: Sergey Sharybin <sergey@blender.org>
Pull Request: https://projects.blender.org/blender/blender/pulls/121050
119 lines
4.6 KiB
C
119 lines
4.6 KiB
C
/* SPDX-FileCopyrightText: 2011-2022 Blender Foundation
|
|
*
|
|
* SPDX-License-Identifier: Apache-2.0 */
|
|
|
|
#ifdef __HIPRT__
|
|
|
|
ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
|
|
ccl_gpu_kernel_signature(integrator_intersect_closest,
|
|
ccl_global const int *path_index_array,
|
|
ccl_global float *render_buffer,
|
|
const int work_size,
|
|
ccl_global hiprtGlobalStackBuffer stack_buffer)
|
|
{
|
|
const int global_index = ccl_gpu_global_id_x();
|
|
|
|
if (global_index < work_size) {
|
|
HIPRT_INIT_KERNEL_GLOBAL()
|
|
const int state = (path_index_array) ? path_index_array[global_index] : global_index;
|
|
ccl_gpu_kernel_call(integrator_intersect_closest(kg, state, render_buffer));
|
|
}
|
|
}
|
|
ccl_gpu_kernel_postfix
|
|
|
|
ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
|
|
ccl_gpu_kernel_signature(integrator_intersect_shadow,
|
|
ccl_global const int *path_index_array,
|
|
const int work_size,
|
|
ccl_global hiprtGlobalStackBuffer stack_buffer)
|
|
{
|
|
const int global_index = ccl_gpu_global_id_x();
|
|
|
|
if (global_index < work_size) {
|
|
HIPRT_INIT_KERNEL_GLOBAL()
|
|
const int state = (path_index_array) ? path_index_array[global_index] : global_index;
|
|
ccl_gpu_kernel_call(integrator_intersect_shadow(kg, state));
|
|
}
|
|
}
|
|
ccl_gpu_kernel_postfix
|
|
|
|
ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
|
|
ccl_gpu_kernel_signature(integrator_intersect_subsurface,
|
|
ccl_global const int *path_index_array,
|
|
const int work_size,
|
|
ccl_global hiprtGlobalStackBuffer stack_buffer)
|
|
{
|
|
const int global_index = ccl_gpu_global_id_x();
|
|
|
|
if (global_index < work_size) {
|
|
HIPRT_INIT_KERNEL_GLOBAL()
|
|
const int state = (path_index_array) ? path_index_array[global_index] : global_index;
|
|
ccl_gpu_kernel_call(integrator_intersect_subsurface(kg, state));
|
|
}
|
|
}
|
|
ccl_gpu_kernel_postfix
|
|
|
|
ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
|
|
ccl_gpu_kernel_signature(integrator_intersect_volume_stack,
|
|
ccl_global const int *path_index_array,
|
|
const int work_size,
|
|
ccl_global hiprtGlobalStackBuffer stack_buffer)
|
|
{
|
|
const int global_index = ccl_gpu_global_id_x();
|
|
|
|
if (global_index < work_size) {
|
|
HIPRT_INIT_KERNEL_GLOBAL()
|
|
const int state = (path_index_array) ? path_index_array[global_index] : global_index;
|
|
ccl_gpu_kernel_call(integrator_intersect_volume_stack(kg, state));
|
|
}
|
|
}
|
|
|
|
ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
|
|
ccl_gpu_kernel_signature(integrator_intersect_dedicated_light,
|
|
ccl_global const int *path_index_array,
|
|
const int work_size,
|
|
ccl_global hiprtGlobalStackBuffer stack_buffer)
|
|
{
|
|
const int global_index = ccl_gpu_global_id_x();
|
|
|
|
if (global_index < work_size) {
|
|
HIPRT_INIT_KERNEL_GLOBAL()
|
|
const int state = (path_index_array) ? path_index_array[global_index] : global_index;
|
|
ccl_gpu_kernel_call(integrator_intersect_dedicated_light(kg, state));
|
|
}
|
|
}
|
|
|
|
ccl_gpu_kernel_postfix
|
|
ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
|
|
ccl_gpu_kernel_signature(integrator_shade_surface_raytrace,
|
|
ccl_global const int *path_index_array,
|
|
ccl_global float *render_buffer,
|
|
const int work_size,
|
|
ccl_global hiprtGlobalStackBuffer stack_buffer)
|
|
{
|
|
const int global_index = ccl_gpu_global_id_x();
|
|
if (global_index < work_size) {
|
|
HIPRT_INIT_KERNEL_GLOBAL()
|
|
const int state = (path_index_array) ? path_index_array[global_index] : global_index;
|
|
ccl_gpu_kernel_call(integrator_shade_surface_raytrace(kg, state, render_buffer));
|
|
}
|
|
}
|
|
ccl_gpu_kernel_postfix
|
|
ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
|
|
ccl_gpu_kernel_signature(integrator_shade_surface_mnee,
|
|
ccl_global const int *path_index_array,
|
|
ccl_global float *render_buffer,
|
|
const int work_size,
|
|
ccl_global hiprtGlobalStackBuffer stack_buffer)
|
|
{
|
|
const int global_index = ccl_gpu_global_id_x();
|
|
if (global_index < work_size) {
|
|
HIPRT_INIT_KERNEL_GLOBAL()
|
|
const int state = (path_index_array) ? path_index_array[global_index] : global_index;
|
|
ccl_gpu_kernel_call(integrator_shade_surface_mnee(kg, state, render_buffer));
|
|
}
|
|
}
|
|
ccl_gpu_kernel_postfix
|
|
|
|
#endif /* __HIPRT__ */
|