HIP RT enables AMD hardware ray tracing on RDNA2 and above, and falls back to a to shader implementation for older graphics cards. It offers an average 25% sample rendering rate improvement in Cycles benchmarks, on a W6800 card. The ray tracing feature functions are accessed through HIP RT SDK, available on GPUOpen. HIP RT traversal functionality is pre-compiled in bitcode format and shipped with the SDK. This is not yet enabled as there are issues to be resolved, but landing the code now makes testing and further changes easier. Known limitations: * Not working yet with current public AMD drivers. * Visual artifact in motion blur. * One of the buffers allocated for traversal has a static size. Allocating it dynamically would reduce memory usage. * This is for Windows only currently, no Linux support. Co-authored-by: Brecht Van Lommel <brecht@blender.org> Ref #105538
69 lines
2.1 KiB
C++
69 lines
2.1 KiB
C++
/* SPDX-License-Identifier: Apache-2.0
|
|
* Copyright 2011-2022 Blender Foundation */
|
|
|
|
#ifdef WITH_HIPRT
|
|
|
|
# include "device/hiprt/queue.h"
|
|
|
|
# include "device/hip/graphics_interop.h"
|
|
# include "device/hip/kernel.h"
|
|
# include "device/hiprt/device_impl.h"
|
|
|
|
CCL_NAMESPACE_BEGIN
|
|
|
|
HIPRTDeviceQueue::HIPRTDeviceQueue(HIPRTDevice *device)
|
|
: HIPDeviceQueue((HIPDevice *)device), hiprt_device_(device)
|
|
{
|
|
}
|
|
|
|
bool HIPRTDeviceQueue::enqueue(DeviceKernel kernel,
|
|
const int work_size,
|
|
DeviceKernelArguments const &args)
|
|
{
|
|
if (hiprt_device_->have_error()) {
|
|
return false;
|
|
}
|
|
|
|
if (!device_kernel_has_intersection(kernel)) {
|
|
return HIPDeviceQueue::enqueue(kernel, work_size, args);
|
|
}
|
|
|
|
debug_enqueue_begin(kernel, work_size);
|
|
|
|
const HIPContextScope scope(hiprt_device_);
|
|
const HIPDeviceKernel &hip_kernel = hiprt_device_->kernels.get(kernel);
|
|
|
|
if (!hiprt_device_->global_stack_buffer.device_pointer) {
|
|
int max_path = num_concurrent_states(0);
|
|
hiprt_device_->global_stack_buffer.alloc(max_path * HIPRT_SHARED_STACK_SIZE * sizeof(int));
|
|
hiprt_device_->global_stack_buffer.zero_to_device();
|
|
}
|
|
|
|
DeviceKernelArguments args_copy = args;
|
|
args_copy.add(&hiprt_device_->global_stack_buffer.device_pointer);
|
|
|
|
/* Compute kernel launch parameters. */
|
|
const int num_threads_per_block = HIPRT_THREAD_GROUP_SIZE;
|
|
const int num_blocks = divide_up(work_size, num_threads_per_block);
|
|
int shared_mem_bytes = 0;
|
|
|
|
assert_success(hipModuleLaunchKernel(hip_kernel.function,
|
|
num_blocks,
|
|
1,
|
|
1,
|
|
num_threads_per_block,
|
|
1,
|
|
1,
|
|
shared_mem_bytes,
|
|
hip_stream_,
|
|
const_cast<void **>(args_copy.values),
|
|
0),
|
|
"enqueue");
|
|
|
|
return !(hiprt_device_->have_error());
|
|
}
|
|
|
|
CCL_NAMESPACE_END
|
|
|
|
#endif /* WITH_HIPRT */
|