Cycles: oneAPI: Compile only needed device binaries in multi-GPU case
The code of the "oneapi_load_kernels" function before this modification was loading kernels and compiling them, if needed, for all devices in the associated GPU context. This makes sense for one GPU execution scenario, as well as for execution scenario of multi identical GPU, but in cases where Blender users have several different GPUs in render, the previous implementation would compile all kernels for all devices for each device, unnecessarily doing the same work multiple times. Because of this, I am changing the implementation so that now compilation happens only for the used device per used device, ensuring that no unnecessary work is done. No render performance changes are expected.
This commit is contained in:
@@ -248,7 +248,8 @@ bool oneapi_load_kernels(SyclQueue *queue_,
|
||||
}
|
||||
|
||||
sycl::kernel_bundle<sycl::bundle_state::input> one_kernel_bundle_input =
|
||||
sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(), {kernel_id});
|
||||
sycl::get_kernel_bundle<sycl::bundle_state::input>(
|
||||
queue->get_context(), {queue->get_device()}, {kernel_id});
|
||||
|
||||
/* Hair requires embree curves support. */
|
||||
if (kernel_features & KERNEL_FEATURE_HAIR) {
|
||||
@@ -294,7 +295,8 @@ bool oneapi_load_kernels(SyclQueue *queue_,
|
||||
# ifdef WITH_EMBREE_GPU
|
||||
if (oneapi_kernel_has_intersections(kernel_name)) {
|
||||
sycl::kernel_bundle<sycl::bundle_state::input> one_kernel_bundle_input =
|
||||
sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(), {kernel_id});
|
||||
sycl::get_kernel_bundle<sycl::bundle_state::input>(
|
||||
queue->get_context(), {queue->get_device()}, {kernel_id});
|
||||
one_kernel_bundle_input
|
||||
.set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
|
||||
RTC_FEATURE_FLAG_NONE);
|
||||
@@ -304,8 +306,8 @@ bool oneapi_load_kernels(SyclQueue *queue_,
|
||||
# endif
|
||||
/* This call will ensure that AoT or cached JIT binaries are available
|
||||
* for execution. It will trigger compilation if it is not already the case. */
|
||||
(void)sycl::get_kernel_bundle<sycl::bundle_state::executable>(queue->get_context(),
|
||||
{kernel_id});
|
||||
(void)sycl::get_kernel_bundle<sycl::bundle_state::executable>(
|
||||
queue->get_context(), {queue->get_device()}, {kernel_id});
|
||||
}
|
||||
}
|
||||
catch (const sycl::exception &e) {
|
||||
|
||||
Reference in New Issue
Block a user