Files
test/intern/cycles/kernel/device/hiprt/hiprt_kernels.h
Sahar A. Kashi 557a245dd5 Cycles: add HIP RT device, for AMD hardware ray tracing on Windows
HIP RT enables AMD hardware ray tracing on RDNA2 and above, and falls back to a
to shader implementation for older graphics cards. It offers an average 25%
sample rendering rate improvement in Cycles benchmarks, on a W6800 card.

The ray tracing feature functions are accessed through HIP RT SDK, available on
GPUOpen. HIP RT traversal functionality is pre-compiled in bitcode format and
shipped with the SDK.

This is not yet enabled as there are issues to be resolved, but landing the
code now makes testing and further changes easier.

Known limitations:
* Not working yet with current public AMD drivers.
* Visual artifact in motion blur.
* One of the buffers allocated for traversal has a static size. Allocating it
  dynamically would reduce memory usage.
* This is for Windows only currently, no Linux support.

Co-authored-by: Brecht Van Lommel <brecht@blender.org>

Ref #105538
2023-04-25 20:19:43 +02:00

102 lines
3.9 KiB
C

/* SPDX-License-Identifier: Apache-2.0
* Copyright 2011-2022 Blender Foundation */
#ifdef __HIPRT__
ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
ccl_gpu_kernel_signature(integrator_intersect_closest,
ccl_global const int *path_index_array,
ccl_global float *render_buffer,
const int work_size,
ccl_global int *stack_buffer)
{
const int global_index = ccl_gpu_global_id_x();
if (global_index < work_size) {
HIPRT_INIT_KERNEL_GLOBAL()
const int state = (path_index_array) ? path_index_array[global_index] : global_index;
ccl_gpu_kernel_call(integrator_intersect_closest(kg, state, render_buffer));
}
}
ccl_gpu_kernel_postfix
ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
ccl_gpu_kernel_signature(integrator_intersect_shadow,
ccl_global const int *path_index_array,
const int work_size,
ccl_global int *stack_buffer)
{
const int global_index = ccl_gpu_global_id_x();
if (global_index < work_size) {
HIPRT_INIT_KERNEL_GLOBAL()
const int state = (path_index_array) ? path_index_array[global_index] : global_index;
ccl_gpu_kernel_call(integrator_intersect_shadow(kg, state));
}
}
ccl_gpu_kernel_postfix
ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
ccl_gpu_kernel_signature(integrator_intersect_subsurface,
ccl_global const int *path_index_array,
const int work_size,
ccl_global int *stack_buffer)
{
const int global_index = ccl_gpu_global_id_x();
if (global_index < work_size) {
HIPRT_INIT_KERNEL_GLOBAL()
const int state = (path_index_array) ? path_index_array[global_index] : global_index;
ccl_gpu_kernel_call(integrator_intersect_subsurface(kg, state));
}
}
ccl_gpu_kernel_postfix
ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
ccl_gpu_kernel_signature(integrator_intersect_volume_stack,
ccl_global const int *path_index_array,
const int work_size,
ccl_global int *stack_buffer)
{
const int global_index = ccl_gpu_global_id_x();
if (global_index < work_size) {
HIPRT_INIT_KERNEL_GLOBAL()
const int state = (path_index_array) ? path_index_array[global_index] : global_index;
ccl_gpu_kernel_call(integrator_intersect_volume_stack(kg, state));
}
}
ccl_gpu_kernel_postfix
ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
ccl_gpu_kernel_signature(integrator_shade_surface_raytrace,
ccl_global const int *path_index_array,
ccl_global float *render_buffer,
const int work_size,
ccl_global int *stack_buffer)
{
const int global_index = ccl_gpu_global_id_x();
if (global_index < work_size) {
HIPRT_INIT_KERNEL_GLOBAL()
const int state = (path_index_array) ? path_index_array[global_index] : global_index;
ccl_gpu_kernel_call(integrator_shade_surface_raytrace(kg, state, render_buffer));
}
}
ccl_gpu_kernel_postfix
ccl_gpu_kernel_threads(GPU_HIPRT_KERNEL_BLOCK_NUM_THREADS)
ccl_gpu_kernel_signature(integrator_shade_surface_mnee,
ccl_global const int *path_index_array,
ccl_global float *render_buffer,
const int work_size,
ccl_global int *stack_buffer)
{
const int global_index = ccl_gpu_global_id_x();
if (global_index < work_size) {
HIPRT_INIT_KERNEL_GLOBAL()
const int state = (path_index_array) ? path_index_array[global_index] : global_index;
ccl_gpu_kernel_call(integrator_shade_surface_mnee(kg, state, render_buffer));
}
}
ccl_gpu_kernel_postfix
#endif /* __HIPRT__ */