Cycles: oneAPI: Compile only needed device binaries in multi-GPU case

The code of the "oneapi_load_kernels" function before this modification
was loading kernels and compiling them, if needed, for all devices in
the associated GPU context. This makes sense for one GPU execution
scenario, as well as for execution scenario of multi identical GPU,
but in cases where Blender users have several different GPUs in
render, the previous implementation would compile all kernels
for all devices for each device, unnecessarily doing the same
work multiple times. Because of this, I am changing the
implementation so that now compilation happens only for the used
device per used device, ensuring that no unnecessary work is done.

No render performance changes are expected.
This commit is contained in:
Nikita Sirgienko
2025-07-19 14:15:36 +02:00
parent 27af4a2f52
commit 9875836519

View File

@@ -248,7 +248,8 @@ bool oneapi_load_kernels(SyclQueue *queue_,
}
sycl::kernel_bundle<sycl::bundle_state::input> one_kernel_bundle_input =
sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(), {kernel_id});
sycl::get_kernel_bundle<sycl::bundle_state::input>(
queue->get_context(), {queue->get_device()}, {kernel_id});
/* Hair requires embree curves support. */
if (kernel_features & KERNEL_FEATURE_HAIR) {
@@ -294,7 +295,8 @@ bool oneapi_load_kernels(SyclQueue *queue_,
# ifdef WITH_EMBREE_GPU
if (oneapi_kernel_has_intersections(kernel_name)) {
sycl::kernel_bundle<sycl::bundle_state::input> one_kernel_bundle_input =
sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(), {kernel_id});
sycl::get_kernel_bundle<sycl::bundle_state::input>(
queue->get_context(), {queue->get_device()}, {kernel_id});
one_kernel_bundle_input
.set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
RTC_FEATURE_FLAG_NONE);
@@ -304,8 +306,8 @@ bool oneapi_load_kernels(SyclQueue *queue_,
# endif
/* This call will ensure that AoT or cached JIT binaries are available
* for execution. It will trigger compilation if it is not already the case. */
(void)sycl::get_kernel_bundle<sycl::bundle_state::executable>(queue->get_context(),
{kernel_id});
(void)sycl::get_kernel_bundle<sycl::bundle_state::executable>(
queue->get_context(), {queue->get_device()}, {kernel_id});
}
}
catch (const sycl::exception &e) {