Files
test/intern/cycles/device/hiprt/queue.cpp
Sahar A. Kashi 557a245dd5 Cycles: add HIP RT device, for AMD hardware ray tracing on Windows
HIP RT enables AMD hardware ray tracing on RDNA2 and above, and falls back to a
to shader implementation for older graphics cards. It offers an average 25%
sample rendering rate improvement in Cycles benchmarks, on a W6800 card.

The ray tracing feature functions are accessed through HIP RT SDK, available on
GPUOpen. HIP RT traversal functionality is pre-compiled in bitcode format and
shipped with the SDK.

This is not yet enabled as there are issues to be resolved, but landing the
code now makes testing and further changes easier.

Known limitations:
* Not working yet with current public AMD drivers.
* Visual artifact in motion blur.
* One of the buffers allocated for traversal has a static size. Allocating it
  dynamically would reduce memory usage.
* This is for Windows only currently, no Linux support.

Co-authored-by: Brecht Van Lommel <brecht@blender.org>

Ref #105538
2023-04-25 20:19:43 +02:00

69 lines
2.1 KiB
C++

/* SPDX-License-Identifier: Apache-2.0
* Copyright 2011-2022 Blender Foundation */
#ifdef WITH_HIPRT
# include "device/hiprt/queue.h"
# include "device/hip/graphics_interop.h"
# include "device/hip/kernel.h"
# include "device/hiprt/device_impl.h"
CCL_NAMESPACE_BEGIN
HIPRTDeviceQueue::HIPRTDeviceQueue(HIPRTDevice *device)
: HIPDeviceQueue((HIPDevice *)device), hiprt_device_(device)
{
}
bool HIPRTDeviceQueue::enqueue(DeviceKernel kernel,
const int work_size,
DeviceKernelArguments const &args)
{
if (hiprt_device_->have_error()) {
return false;
}
if (!device_kernel_has_intersection(kernel)) {
return HIPDeviceQueue::enqueue(kernel, work_size, args);
}
debug_enqueue_begin(kernel, work_size);
const HIPContextScope scope(hiprt_device_);
const HIPDeviceKernel &hip_kernel = hiprt_device_->kernels.get(kernel);
if (!hiprt_device_->global_stack_buffer.device_pointer) {
int max_path = num_concurrent_states(0);
hiprt_device_->global_stack_buffer.alloc(max_path * HIPRT_SHARED_STACK_SIZE * sizeof(int));
hiprt_device_->global_stack_buffer.zero_to_device();
}
DeviceKernelArguments args_copy = args;
args_copy.add(&hiprt_device_->global_stack_buffer.device_pointer);
/* Compute kernel launch parameters. */
const int num_threads_per_block = HIPRT_THREAD_GROUP_SIZE;
const int num_blocks = divide_up(work_size, num_threads_per_block);
int shared_mem_bytes = 0;
assert_success(hipModuleLaunchKernel(hip_kernel.function,
num_blocks,
1,
1,
num_threads_per_block,
1,
1,
shared_mem_bytes,
hip_stream_,
const_cast<void **>(args_copy.values),
0),
"enqueue");
return !(hiprt_device_->have_error());
}
CCL_NAMESPACE_END
#endif /* WITH_HIPRT */