2025-10-06 18:16:56 +02:00
|
|
|
/* SPDX-FileCopyrightText: 2021-2025 Intel Corporation
|
2023-06-14 16:52:36 +10:00
|
|
|
*
|
|
|
|
|
* SPDX-License-Identifier: Apache-2.0 */
|
2022-06-29 12:58:04 +02:00
|
|
|
|
|
|
|
|
#ifdef WITH_ONEAPI
|
|
|
|
|
|
|
|
|
|
# include "device/oneapi/queue.h"
|
|
|
|
|
# include "device/oneapi/device_impl.h"
|
2025-10-06 18:16:56 +02:00
|
|
|
# include "device/oneapi/graphics_interop.h"
|
2022-06-29 12:58:04 +02:00
|
|
|
# include "util/log.h"
|
|
|
|
|
|
|
|
|
|
# include "kernel/device/oneapi/kernel.h"
|
|
|
|
|
|
|
|
|
|
CCL_NAMESPACE_BEGIN
|
|
|
|
|
|
|
|
|
|
struct KernelExecutionInfo {
|
|
|
|
|
double elapsed_summary = 0.0;
|
|
|
|
|
int enqueue_count = 0;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/* OneapiDeviceQueue */
|
|
|
|
|
|
|
|
|
|
OneapiDeviceQueue::OneapiDeviceQueue(OneapiDevice *device)
|
2024-12-29 23:13:45 +01:00
|
|
|
: DeviceQueue(device), oneapi_device_(device)
|
2022-06-29 12:58:04 +02:00
|
|
|
{
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int OneapiDeviceQueue::num_concurrent_states(const size_t state_size) const
|
|
|
|
|
{
|
2024-07-18 15:46:17 +02:00
|
|
|
int num_states = 4 * num_concurrent_busy_states(state_size);
|
2022-06-29 12:58:04 +02:00
|
|
|
|
2025-08-18 20:22:44 +02:00
|
|
|
LOG_TRACE << "GPU queue concurrent states: " << num_states << ", using up to "
|
2025-07-10 19:44:14 +02:00
|
|
|
<< string_human_readable_size(num_states * state_size);
|
2022-06-29 12:58:04 +02:00
|
|
|
|
|
|
|
|
return num_states;
|
|
|
|
|
}
|
|
|
|
|
|
2022-10-24 10:23:56 +01:00
|
|
|
int OneapiDeviceQueue::num_concurrent_busy_states(const size_t /*state_size*/) const
|
2022-06-29 12:58:04 +02:00
|
|
|
{
|
2022-07-27 09:38:19 +02:00
|
|
|
const int max_num_threads = oneapi_device_->get_num_multiprocessors() *
|
|
|
|
|
oneapi_device_->get_max_num_threads_per_multiprocessor();
|
|
|
|
|
|
|
|
|
|
return 4 * max(8 * max_num_threads, 65536);
|
2022-06-29 12:58:04 +02:00
|
|
|
}
|
|
|
|
|
|
2025-06-18 08:21:19 +02:00
|
|
|
int OneapiDeviceQueue::num_sort_partitions(int max_num_paths, uint /*max_scene_shaders*/) const
|
2024-01-31 17:25:34 +01:00
|
|
|
{
|
2025-06-18 08:21:19 +02:00
|
|
|
int sort_partition_elements = (oneapi_device_->get_max_num_threads_per_multiprocessor() >= 128) ?
|
|
|
|
|
65536 :
|
|
|
|
|
8192;
|
|
|
|
|
/* Sort partitioning with local sorting on Intel GPUs is currently the most effective solution no
|
|
|
|
|
* matter the number of shaders. */
|
|
|
|
|
return max(max_num_paths / sort_partition_elements, 1);
|
2024-01-31 17:25:34 +01:00
|
|
|
}
|
|
|
|
|
|
2022-06-29 12:58:04 +02:00
|
|
|
void OneapiDeviceQueue::init_execution()
|
|
|
|
|
{
|
|
|
|
|
oneapi_device_->load_texture_info();
|
|
|
|
|
|
|
|
|
|
SyclQueue *device_queue = oneapi_device_->sycl_queue();
|
2024-12-26 17:53:59 +01:00
|
|
|
void *kg_dptr = oneapi_device_->kernel_globals_device_pointer();
|
2022-06-29 12:58:04 +02:00
|
|
|
assert(device_queue);
|
|
|
|
|
assert(kg_dptr);
|
2024-12-29 23:13:45 +01:00
|
|
|
kernel_context_ = make_unique<KernelContext>();
|
|
|
|
|
kernel_context_->queue = device_queue;
|
|
|
|
|
kernel_context_->kernel_globals = kg_dptr;
|
2022-06-29 12:58:04 +02:00
|
|
|
|
|
|
|
|
debug_init_execution();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool OneapiDeviceQueue::enqueue(DeviceKernel kernel,
|
|
|
|
|
const int signed_kernel_work_size,
|
2024-12-29 17:32:00 +01:00
|
|
|
const DeviceKernelArguments &_args)
|
2022-06-29 12:58:04 +02:00
|
|
|
{
|
|
|
|
|
if (oneapi_device_->have_error()) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2025-01-17 09:09:41 +01:00
|
|
|
/* Update texture info in case memory moved to host. */
|
|
|
|
|
if (oneapi_device_->load_texture_info()) {
|
2025-02-11 18:45:29 +01:00
|
|
|
if (!synchronize()) {
|
2025-01-17 09:09:41 +01:00
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-06-29 12:58:04 +02:00
|
|
|
void **args = const_cast<void **>(_args.values);
|
|
|
|
|
|
2022-09-14 15:55:56 +02:00
|
|
|
debug_enqueue_begin(kernel, signed_kernel_work_size);
|
2022-06-29 12:58:04 +02:00
|
|
|
assert(signed_kernel_work_size >= 0);
|
2023-08-22 19:04:16 +02:00
|
|
|
size_t kernel_global_size = (size_t)signed_kernel_work_size;
|
|
|
|
|
size_t kernel_local_size;
|
2022-06-29 12:58:04 +02:00
|
|
|
|
2023-05-17 00:20:46 +02:00
|
|
|
assert(kernel_context_);
|
|
|
|
|
kernel_context_->scene_max_shaders = oneapi_device_->scene_max_shaders();
|
|
|
|
|
|
2023-08-22 19:04:16 +02:00
|
|
|
oneapi_device_->get_adjusted_global_and_local_sizes(
|
|
|
|
|
kernel_context_->queue, kernel, kernel_global_size, kernel_local_size);
|
2022-06-29 12:58:04 +02:00
|
|
|
|
|
|
|
|
/* Call the oneAPI kernel DLL to launch the requested kernel. */
|
2022-10-06 18:35:51 +02:00
|
|
|
bool is_finished_ok = oneapi_device_->enqueue_kernel(
|
2024-12-29 23:13:45 +01:00
|
|
|
kernel_context_.get(), kernel, kernel_global_size, kernel_local_size, args);
|
2022-06-29 12:58:04 +02:00
|
|
|
|
|
|
|
|
if (is_finished_ok == false) {
|
|
|
|
|
oneapi_device_->set_error("oneAPI kernel \"" + std::string(device_kernel_as_string(kernel)) +
|
|
|
|
|
"\" execution error: got runtime exception \"" +
|
|
|
|
|
oneapi_device_->oneapi_error_message() + "\"");
|
|
|
|
|
}
|
|
|
|
|
|
2022-09-14 15:55:56 +02:00
|
|
|
debug_enqueue_end();
|
|
|
|
|
|
2022-06-29 12:58:04 +02:00
|
|
|
return is_finished_ok;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool OneapiDeviceQueue::synchronize()
|
|
|
|
|
{
|
|
|
|
|
if (oneapi_device_->have_error()) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
2022-10-06 18:35:51 +02:00
|
|
|
bool is_finished_ok = oneapi_device_->queue_synchronize(oneapi_device_->sycl_queue());
|
2024-12-26 17:53:59 +01:00
|
|
|
if (is_finished_ok == false) {
|
2022-06-29 12:58:04 +02:00
|
|
|
oneapi_device_->set_error("oneAPI unknown kernel execution error: got runtime exception \"" +
|
|
|
|
|
oneapi_device_->oneapi_error_message() + "\"");
|
2024-12-26 17:53:59 +01:00
|
|
|
}
|
2022-06-29 12:58:04 +02:00
|
|
|
|
|
|
|
|
debug_synchronize();
|
|
|
|
|
|
|
|
|
|
return !(oneapi_device_->have_error());
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void OneapiDeviceQueue::zero_to_device(device_memory &mem)
|
|
|
|
|
{
|
|
|
|
|
oneapi_device_->mem_zero(mem);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void OneapiDeviceQueue::copy_to_device(device_memory &mem)
|
|
|
|
|
{
|
|
|
|
|
oneapi_device_->mem_copy_to(mem);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void OneapiDeviceQueue::copy_from_device(device_memory &mem)
|
|
|
|
|
{
|
|
|
|
|
oneapi_device_->mem_copy_from(mem);
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-06 18:16:56 +02:00
|
|
|
# ifdef SYCL_LINEAR_MEMORY_INTEROP_AVAILABLE
|
|
|
|
|
unique_ptr<DeviceGraphicsInterop> OneapiDeviceQueue::graphics_interop_create()
|
|
|
|
|
{
|
|
|
|
|
return make_unique<OneapiDeviceGraphicsInterop>(this);
|
|
|
|
|
}
|
|
|
|
|
# endif
|
|
|
|
|
|
2022-06-29 12:58:04 +02:00
|
|
|
CCL_NAMESPACE_END
|
|
|
|
|
|
|
|
|
|
#endif /* WITH_ONEAPI */
|