Cycles: oneAPI: use specialization constant to compile with/without Embree on GPU
This commit is contained in:
@@ -120,18 +120,16 @@ bool OneapiDevice::check_peer_access(Device * /*peer_device*/)
|
||||
return false;
|
||||
}
|
||||
|
||||
bool OneapiDevice::can_use_hardware_raytracing_for_features(uint kernel_features) const
|
||||
bool OneapiDevice::can_use_hardware_raytracing_for_features(uint requested_features) const
|
||||
{
|
||||
/* MNEE and Raytrace kernels currently don't work correctly with HWRT. */
|
||||
if ((kernel_features & KERNEL_FEATURE_MNEE || kernel_features & KERNEL_FEATURE_NODE_RAYTRACE)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
return !(requested_features & (KERNEL_FEATURE_MNEE | KERNEL_FEATURE_NODE_RAYTRACE));
|
||||
}
|
||||
|
||||
BVHLayoutMask OneapiDevice::get_bvh_layout_mask(uint kernel_features) const
|
||||
BVHLayoutMask OneapiDevice::get_bvh_layout_mask(uint requested_features) const
|
||||
{
|
||||
return (use_hardware_raytracing && can_use_hardware_raytracing_for_features(kernel_features)) ?
|
||||
return (use_hardware_raytracing &&
|
||||
can_use_hardware_raytracing_for_features(requested_features)) ?
|
||||
BVH_LAYOUT_EMBREE :
|
||||
BVH_LAYOUT_BVH2;
|
||||
}
|
||||
@@ -175,8 +173,8 @@ bool OneapiDevice::load_kernels(const uint requested_features)
|
||||
}
|
||||
|
||||
if (use_hardware_raytracing && !can_use_hardware_raytracing_for_features(requested_features)) {
|
||||
VLOG_INFO << "Requested features don't work properly together with Hardware Raytracing yet "
|
||||
"in oneAPI backend. Hardware Raytracing is now disabled.";
|
||||
VLOG_INFO
|
||||
<< "Hardware ray tracing disabled, not supported yet by oneAPI for requested features.";
|
||||
use_hardware_raytracing = false;
|
||||
}
|
||||
|
||||
|
||||
@@ -118,7 +118,7 @@ class OneapiDevice : public Device {
|
||||
SyclQueue *sycl_queue();
|
||||
|
||||
protected:
|
||||
bool can_use_hwrt_for_features(uint kernel_features) const;
|
||||
bool can_use_hardware_raytracing_for_features(uint kernel_features) const;
|
||||
void check_usm(SyclQueue *queue, const void *usm_ptr, bool allow_host);
|
||||
bool create_queue(SyclQueue *&external_queue, int device_index, void *embree_device);
|
||||
void free_queue(SyclQueue *queue);
|
||||
|
||||
@@ -779,7 +779,7 @@ if(WITH_CYCLES_DEVICE_ONEAPI)
|
||||
# Host execution won't use GPU binaries, no need to compile them.
|
||||
if(WITH_CYCLES_ONEAPI_BINARIES AND NOT WITH_CYCLES_ONEAPI_HOST_TASK_EXECUTION)
|
||||
# AoT binaries aren't currently reused when calling sycl::build.
|
||||
list(APPEND sycl_compiler_flags -DSYCL_SKIP_KERNELS_PRELOAD)
|
||||
list(APPEND sycl_compiler_flags -DWITH_CYCLES_ONEAPI_BINARIES)
|
||||
# Iterate over all targest and their options
|
||||
list(JOIN CYCLES_ONEAPI_SYCL_TARGETS "," targets_string)
|
||||
list(APPEND sycl_compiler_flags -fsycl-targets=${targets_string})
|
||||
|
||||
@@ -21,6 +21,28 @@
|
||||
# define __BVH2__
|
||||
#endif
|
||||
|
||||
#if defined(__KERNEL_ONEAPI__) && defined(WITH_EMBREE_GPU)
|
||||
/* bool is apparently not tested for specialization constants:
|
||||
* https://github.com/intel/llvm/blob/39d1c65272a786b2b13a6f094facfddf9408406d/sycl/test/basic_tests/SYCL-2020-spec-constants.cpp#L25-L27
|
||||
* Instead of adding one more bool specialization constant, we reuse existing embree_features one
|
||||
* and use RTC_FEATURE_FLAG_NONE as value to test for avoiding to call Embree on GPU.
|
||||
*/
|
||||
/* We set it to RTC_FEATURE_FLAG_NONE by default so AoT binaries contain MNE and raytrace kernels
|
||||
* precompiled without Embree.
|
||||
* Changing this default value would require updating the logic in oneapi_load_kernels(). */
|
||||
static constexpr sycl::specialization_id<RTCFeatureFlags> oneapi_embree_features{
|
||||
RTC_FEATURE_FLAG_NONE};
|
||||
# define IF_USING_EMBREE \
|
||||
if (kernel_handler.get_specialization_constant<oneapi_embree_features>() != \
|
||||
RTC_FEATURE_FLAG_NONE)
|
||||
# define IF_NOT_USING_EMBREE \
|
||||
if (kernel_handler.get_specialization_constant<oneapi_embree_features>() == \
|
||||
RTC_FEATURE_FLAG_NONE)
|
||||
#else
|
||||
# define IF_USING_EMBREE
|
||||
# define IF_NOT_USING_EMBREE
|
||||
#endif
|
||||
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
#ifdef __BVH2__
|
||||
@@ -74,30 +96,39 @@ ccl_device_intersect bool scene_intersect(KernelGlobals kg,
|
||||
}
|
||||
|
||||
# ifdef __EMBREE__
|
||||
if (kernel_data.device_bvh) {
|
||||
return kernel_embree_intersect(kg, ray, visibility, isect);
|
||||
IF_USING_EMBREE
|
||||
{
|
||||
if (kernel_data.device_bvh) {
|
||||
return kernel_embree_intersect(kg, ray, visibility, isect);
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
IF_NOT_USING_EMBREE
|
||||
{
|
||||
# ifdef __OBJECT_MOTION__
|
||||
if (kernel_data.bvh.have_motion) {
|
||||
if (kernel_data.bvh.have_motion) {
|
||||
# ifdef __HAIR__
|
||||
if (kernel_data.bvh.have_curves) {
|
||||
return bvh_intersect_hair_motion(kg, ray, isect, visibility);
|
||||
}
|
||||
if (kernel_data.bvh.have_curves) {
|
||||
return bvh_intersect_hair_motion(kg, ray, isect, visibility);
|
||||
}
|
||||
# endif /* __HAIR__ */
|
||||
|
||||
return bvh_intersect_motion(kg, ray, isect, visibility);
|
||||
}
|
||||
return bvh_intersect_motion(kg, ray, isect, visibility);
|
||||
}
|
||||
# endif /* __OBJECT_MOTION__ */
|
||||
|
||||
# ifdef __HAIR__
|
||||
if (kernel_data.bvh.have_curves) {
|
||||
return bvh_intersect_hair(kg, ray, isect, visibility);
|
||||
}
|
||||
if (kernel_data.bvh.have_curves) {
|
||||
return bvh_intersect_hair(kg, ray, isect, visibility);
|
||||
}
|
||||
# endif /* __HAIR__ */
|
||||
|
||||
return bvh_intersect(kg, ray, isect, visibility);
|
||||
return bvh_intersect(kg, ray, isect, visibility);
|
||||
}
|
||||
|
||||
kernel_assert(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Single object BVH traversal, for SSS/AO/bevel. */
|
||||
@@ -129,17 +160,27 @@ ccl_device_intersect bool scene_intersect_local(KernelGlobals kg,
|
||||
}
|
||||
|
||||
# ifdef __EMBREE__
|
||||
if (kernel_data.device_bvh) {
|
||||
return kernel_embree_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits);
|
||||
IF_USING_EMBREE
|
||||
{
|
||||
if (kernel_data.device_bvh) {
|
||||
return kernel_embree_intersect_local(
|
||||
kg, ray, local_isect, local_object, lcg_state, max_hits);
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
IF_NOT_USING_EMBREE
|
||||
{
|
||||
# ifdef __OBJECT_MOTION__
|
||||
if (kernel_data.bvh.have_motion) {
|
||||
return bvh_intersect_local_motion(kg, ray, local_isect, local_object, lcg_state, max_hits);
|
||||
}
|
||||
if (kernel_data.bvh.have_motion) {
|
||||
return bvh_intersect_local_motion(kg, ray, local_isect, local_object, lcg_state, max_hits);
|
||||
}
|
||||
# endif /* __OBJECT_MOTION__ */
|
||||
return bvh_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits);
|
||||
return bvh_intersect_local(kg, ray, local_isect, local_object, lcg_state, max_hits);
|
||||
}
|
||||
|
||||
kernel_assert(false);
|
||||
return false;
|
||||
}
|
||||
# endif
|
||||
|
||||
@@ -184,35 +225,44 @@ ccl_device_intersect bool scene_intersect_shadow_all(KernelGlobals kg,
|
||||
}
|
||||
|
||||
# ifdef __EMBREE__
|
||||
if (kernel_data.device_bvh) {
|
||||
return kernel_embree_intersect_shadow_all(
|
||||
kg, state, ray, visibility, max_hits, num_recorded_hits, throughput);
|
||||
IF_USING_EMBREE
|
||||
{
|
||||
if (kernel_data.device_bvh) {
|
||||
return kernel_embree_intersect_shadow_all(
|
||||
kg, state, ray, visibility, max_hits, num_recorded_hits, throughput);
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
IF_NOT_USING_EMBREE
|
||||
{
|
||||
# ifdef __OBJECT_MOTION__
|
||||
if (kernel_data.bvh.have_motion) {
|
||||
if (kernel_data.bvh.have_motion) {
|
||||
# ifdef __HAIR__
|
||||
if (kernel_data.bvh.have_curves) {
|
||||
return bvh_intersect_shadow_all_hair_motion(
|
||||
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
|
||||
}
|
||||
if (kernel_data.bvh.have_curves) {
|
||||
return bvh_intersect_shadow_all_hair_motion(
|
||||
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
|
||||
}
|
||||
# endif /* __HAIR__ */
|
||||
|
||||
return bvh_intersect_shadow_all_motion(
|
||||
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
|
||||
}
|
||||
return bvh_intersect_shadow_all_motion(
|
||||
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
|
||||
}
|
||||
# endif /* __OBJECT_MOTION__ */
|
||||
|
||||
# ifdef __HAIR__
|
||||
if (kernel_data.bvh.have_curves) {
|
||||
return bvh_intersect_shadow_all_hair(
|
||||
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
|
||||
}
|
||||
if (kernel_data.bvh.have_curves) {
|
||||
return bvh_intersect_shadow_all_hair(
|
||||
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
|
||||
}
|
||||
# endif /* __HAIR__ */
|
||||
|
||||
return bvh_intersect_shadow_all(
|
||||
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
|
||||
return bvh_intersect_shadow_all(
|
||||
kg, ray, state, visibility, max_hits, num_recorded_hits, throughput);
|
||||
}
|
||||
|
||||
kernel_assert(false);
|
||||
return false;
|
||||
}
|
||||
# endif /* __SHADOW_RECORD_ALL__ */
|
||||
|
||||
@@ -239,13 +289,19 @@ ccl_device_intersect bool scene_intersect_volume(KernelGlobals kg,
|
||||
return false;
|
||||
}
|
||||
|
||||
IF_NOT_USING_EMBREE
|
||||
{
|
||||
# ifdef __OBJECT_MOTION__
|
||||
if (kernel_data.bvh.have_motion) {
|
||||
return bvh_intersect_volume_motion(kg, ray, isect, visibility);
|
||||
}
|
||||
if (kernel_data.bvh.have_motion) {
|
||||
return bvh_intersect_volume_motion(kg, ray, isect, visibility);
|
||||
}
|
||||
# endif /* __OBJECT_MOTION__ */
|
||||
|
||||
return bvh_intersect_volume(kg, ray, isect, visibility);
|
||||
return bvh_intersect_volume(kg, ray, isect, visibility);
|
||||
}
|
||||
|
||||
kernel_assert(false);
|
||||
return false;
|
||||
}
|
||||
# endif /* defined(__VOLUME__) && !defined(__VOLUME_RECORD_ALL__) */
|
||||
|
||||
@@ -275,18 +331,27 @@ ccl_device_intersect uint scene_intersect_volume(KernelGlobals kg,
|
||||
}
|
||||
|
||||
# ifdef __EMBREE__
|
||||
if (kernel_data.device_bvh) {
|
||||
return kernel_embree_intersect_volume(kg, ray, isect, max_hits, visibility);
|
||||
IF_USING_EMBREE
|
||||
{
|
||||
if (kernel_data.device_bvh) {
|
||||
return kernel_embree_intersect_volume(kg, ray, isect, max_hits, visibility);
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
IF_NOT_USING_EMBREE
|
||||
{
|
||||
# ifdef __OBJECT_MOTION__
|
||||
if (kernel_data.bvh.have_motion) {
|
||||
return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits, visibility);
|
||||
}
|
||||
if (kernel_data.bvh.have_motion) {
|
||||
return bvh_intersect_volume_all_motion(kg, ray, isect, max_hits, visibility);
|
||||
}
|
||||
# endif /* __OBJECT_MOTION__ */
|
||||
|
||||
return bvh_intersect_volume_all(kg, ray, isect, max_hits, visibility);
|
||||
return bvh_intersect_volume_all(kg, ray, isect, max_hits, visibility);
|
||||
}
|
||||
|
||||
kernel_assert(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
# endif /* defined(__VOLUME__) && defined(__VOLUME_RECORD_ALL__) */
|
||||
|
||||
@@ -39,8 +39,6 @@ using numhit_t = uint32_t;
|
||||
#endif
|
||||
|
||||
#ifdef __KERNEL_ONEAPI__
|
||||
static constexpr sycl::specialization_id<RTCFeatureFlags> oneapi_embree_features{
|
||||
(const RTCFeatureFlags)(0)};
|
||||
# define CYCLES_EMBREE_USED_FEATURES \
|
||||
(kernel_handler.get_specialization_constant<oneapi_embree_features>())
|
||||
#else
|
||||
|
||||
@@ -191,6 +191,10 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
||||
}
|
||||
ccl_gpu_kernel_postfix
|
||||
|
||||
#ifdef __KERNEL_ONEAPI__
|
||||
# include "kernel/device/oneapi/context_intersect_end.h"
|
||||
#endif
|
||||
|
||||
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
||||
ccl_gpu_kernel_signature(integrator_shade_background,
|
||||
ccl_global const int *path_index_array,
|
||||
@@ -255,6 +259,12 @@ ccl_gpu_kernel_postfix
|
||||
constant int __dummy_constant [[function_constant(Kernel_DummyConstant)]];
|
||||
#endif
|
||||
|
||||
/* Kernels using intersections need access to the kernel handler for specialization constants to
|
||||
* work properly. */
|
||||
#ifdef __KERNEL_ONEAPI__
|
||||
# include "kernel/device/oneapi/context_intersect_begin.h"
|
||||
#endif
|
||||
|
||||
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
||||
ccl_gpu_kernel_signature(integrator_shade_surface_raytrace,
|
||||
ccl_global const int *path_index_array,
|
||||
@@ -293,6 +303,9 @@ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
||||
}
|
||||
}
|
||||
ccl_gpu_kernel_postfix
|
||||
#ifdef __KERNEL_ONEAPI__
|
||||
# include "kernel/device/oneapi/context_intersect_end.h"
|
||||
#endif
|
||||
|
||||
ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS)
|
||||
ccl_gpu_kernel_signature(integrator_shade_volume,
|
||||
|
||||
@@ -155,54 +155,93 @@ size_t oneapi_kernel_preferred_local_size(SyclQueue *queue,
|
||||
return std::min(limit_work_group_size, preferred_work_group_size);
|
||||
}
|
||||
|
||||
bool oneapi_load_kernels(SyclQueue *queue_, const uint requested_features)
|
||||
bool oneapi_kernel_is_required_for_features(const std::string &kernel_name,
|
||||
const uint kernel_features)
|
||||
{
|
||||
if ((kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) == 0 &&
|
||||
kernel_name.find(device_kernel_as_string(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE)) !=
|
||||
std::string::npos)
|
||||
return false;
|
||||
if ((kernel_features & KERNEL_FEATURE_MNEE) == 0 &&
|
||||
kernel_name.find(device_kernel_as_string(DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE)) !=
|
||||
std::string::npos)
|
||||
return false;
|
||||
if ((kernel_features & KERNEL_FEATURE_VOLUME) == 0 &&
|
||||
kernel_name.find(device_kernel_as_string(DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK)) !=
|
||||
std::string::npos)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool oneapi_kernel_is_using_embree(const std::string &kernel_name)
|
||||
{
|
||||
# ifdef WITH_EMBREE_GPU
|
||||
/* MNEE and Raytrace kernels aren't yet enabled to use Embree. */
|
||||
for (int i = 0; i < (int)DEVICE_KERNEL_NUM; i++) {
|
||||
DeviceKernel kernel = (DeviceKernel)i;
|
||||
if (device_kernel_has_intersection(kernel)) {
|
||||
if (kernel_name.find(device_kernel_as_string(kernel)) != std::string::npos) {
|
||||
return !(kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE ||
|
||||
kernel == DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE);
|
||||
}
|
||||
}
|
||||
}
|
||||
# endif
|
||||
return false;
|
||||
}
|
||||
|
||||
bool oneapi_load_kernels(SyclQueue *queue_,
|
||||
const uint kernel_features,
|
||||
bool use_hardware_raytracing)
|
||||
{
|
||||
assert(queue_);
|
||||
sycl::queue *queue = reinterpret_cast<sycl::queue *>(queue_);
|
||||
|
||||
# ifdef WITH_EMBREE_GPU
|
||||
/* Preloading intersection kernels is mandatory with Embree on GPU execution,
|
||||
* because AoT will be not fully performant. */
|
||||
try {
|
||||
sycl::kernel_bundle<sycl::bundle_state::input> all_kernels_bundle =
|
||||
sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(),
|
||||
{queue->get_device()});
|
||||
/* For best performance, we always JIT compile the kernels that are using Embree. */
|
||||
if (use_hardware_raytracing) {
|
||||
try {
|
||||
sycl::kernel_bundle<sycl::bundle_state::input> all_kernels_bundle =
|
||||
sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(),
|
||||
{queue->get_device()});
|
||||
|
||||
for (const sycl::kernel_id &kernel_id : all_kernels_bundle.get_kernel_ids()) {
|
||||
const std::string &kernel_name = kernel_id.get_name();
|
||||
for (const sycl::kernel_id &kernel_id : all_kernels_bundle.get_kernel_ids()) {
|
||||
const std::string &kernel_name = kernel_id.get_name();
|
||||
|
||||
/* NOTE(@nsirgien): Names in this conditions below should match names from
|
||||
* oneapi_call macro in oneapi_enqueue_kernel below */
|
||||
/* Also, here we handle only intersection kernels (and skip the rest) */
|
||||
if (kernel_name.find("_intersect_") == std::string::npos) {
|
||||
continue;
|
||||
if (!oneapi_kernel_is_required_for_features(kernel_name, kernel_features) ||
|
||||
!oneapi_kernel_is_using_embree(kernel_name)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
sycl::kernel_bundle<sycl::bundle_state::input> one_kernel_bundle_input =
|
||||
sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(), {kernel_id});
|
||||
|
||||
/* Hair requires embree curves support. */
|
||||
if (kernel_features & KERNEL_FEATURE_HAIR) {
|
||||
one_kernel_bundle_input
|
||||
.set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
|
||||
CYCLES_ONEAPI_EMBREE_ALL_FEATURES);
|
||||
sycl::build(one_kernel_bundle_input);
|
||||
}
|
||||
else {
|
||||
one_kernel_bundle_input
|
||||
.set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
|
||||
CYCLES_ONEAPI_EMBREE_BASIC_FEATURES);
|
||||
sycl::build(one_kernel_bundle_input);
|
||||
}
|
||||
}
|
||||
|
||||
if (((requested_features & KERNEL_FEATURE_VOLUME) == 0) &&
|
||||
kernel_name.find("_intersect_volume") != std::string::npos) {
|
||||
continue;
|
||||
}
|
||||
catch (sycl::exception const &e) {
|
||||
if (s_error_cb) {
|
||||
s_error_cb(e.what(), s_error_user_ptr);
|
||||
}
|
||||
|
||||
sycl::kernel_bundle<sycl::bundle_state::input> one_kernel_bundle =
|
||||
sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(), {kernel_id});
|
||||
|
||||
one_kernel_bundle.set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
|
||||
CYCLES_ONEAPI_EMBREE_BASIC_FEATURES);
|
||||
sycl::build(one_kernel_bundle);
|
||||
|
||||
one_kernel_bundle.set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
|
||||
CYCLES_ONEAPI_EMBREE_ALL_FEATURES);
|
||||
sycl::build(one_kernel_bundle);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
catch (sycl::exception const &e) {
|
||||
if (s_error_cb) {
|
||||
s_error_cb(e.what(), s_error_user_ptr);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
# endif
|
||||
|
||||
# ifdef SYCL_SKIP_KERNELS_PRELOAD
|
||||
# ifdef WITH_CYCLES_ONEAPI_BINARIES
|
||||
(void)queue_;
|
||||
(void)kernel_features;
|
||||
# else
|
||||
@@ -214,27 +253,25 @@ bool oneapi_load_kernels(SyclQueue *queue_, const uint requested_features)
|
||||
for (const sycl::kernel_id &kernel_id : all_kernels_bundle.get_kernel_ids()) {
|
||||
const std::string &kernel_name = kernel_id.get_name();
|
||||
|
||||
/* NOTE(@nsirgien): Names in this conditions below should match names from
|
||||
* oneapi_call macro in oneapi_enqueue_kernel below */
|
||||
if (((requested_features & KERNEL_FEATURE_VOLUME) == 0) &&
|
||||
kernel_name.find("oneapi_kernel_integrator_shade_volume") != std::string::npos) {
|
||||
/* In case HWRT is on, compilation of kernels using Embree is already handled in previous
|
||||
* block. */
|
||||
if (!oneapi_kernel_is_required_for_features(kernel_name, kernel_features) ||
|
||||
(use_hardware_raytracing && oneapi_kernel_is_using_embree(kernel_name))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (((requested_features & KERNEL_FEATURE_MNEE) == 0) &&
|
||||
kernel_name.find("oneapi_kernel_integrator_shade_surface_mnee") != std::string::npos) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (((requested_features & KERNEL_FEATURE_NODE_RAYTRACE) == 0) &&
|
||||
kernel_name.find("oneapi_kernel_integrator_shade_surface_raytrace") !=
|
||||
std::string::npos) {
|
||||
continue;
|
||||
}
|
||||
|
||||
sycl::kernel_bundle<sycl::bundle_state::input> one_kernel_bundle =
|
||||
sycl::kernel_bundle<sycl::bundle_state::input> one_kernel_bundle_input =
|
||||
sycl::get_kernel_bundle<sycl::bundle_state::input>(queue->get_context(), {kernel_id});
|
||||
sycl::build(one_kernel_bundle);
|
||||
# ifdef WITH_EMBREE_GPU
|
||||
/* This is expected to be the default, we set it again to be sure. */
|
||||
if (one_kernel_bundle_input
|
||||
.has_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>()) {
|
||||
one_kernel_bundle_input
|
||||
.set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
|
||||
RTC_FEATURE_FLAG_NONE);
|
||||
}
|
||||
# endif
|
||||
sycl::build(one_kernel_bundle_input);
|
||||
}
|
||||
}
|
||||
catch (sycl::exception const &e) {
|
||||
@@ -303,23 +340,22 @@ bool oneapi_enqueue_kernel(KernelContext *kernel_context,
|
||||
# pragma GCC diagnostic error "-Wswitch"
|
||||
# endif
|
||||
|
||||
# ifdef WITH_EMBREE_GPU
|
||||
bool is_with_rthw_kernel = device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_CLOSEST ||
|
||||
device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SHADOW ||
|
||||
device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_SUBSURFACE ||
|
||||
device_kernel == DEVICE_KERNEL_INTEGRATOR_INTERSECT_VOLUME_STACK;
|
||||
const RTCFeatureFlags used_embree_features = (is_with_rthw_kernel && with_hwrt &&
|
||||
!with_curve_features) ?
|
||||
CYCLES_ONEAPI_EMBREE_BASIC_FEATURES :
|
||||
CYCLES_ONEAPI_EMBREE_ALL_FEATURES;
|
||||
# endif
|
||||
|
||||
try {
|
||||
queue->submit([&](sycl::handler &cgh) {
|
||||
# ifdef WITH_EMBREE_GPU
|
||||
if (is_with_rthw_kernel)
|
||||
/* Spec says it has no effect if the called kernel doesn't support the below specialization
|
||||
* constant but it can still trigger a recompilation, so we set it only if needed. */
|
||||
if (device_kernel_has_intersection(device_kernel)) {
|
||||
const RTCFeatureFlags used_embree_features = !use_hardware_raytracing ?
|
||||
RTC_FEATURE_FLAG_NONE :
|
||||
!(kernel_features & KERNEL_FEATURE_HAIR) ?
|
||||
CYCLES_ONEAPI_EMBREE_BASIC_FEATURES :
|
||||
CYCLES_ONEAPI_EMBREE_ALL_FEATURES;
|
||||
cgh.set_specialization_constant<ONEAPIKernelContext::oneapi_embree_features>(
|
||||
used_embree_features);
|
||||
}
|
||||
# else
|
||||
(void)kernel_features;
|
||||
# endif
|
||||
switch (device_kernel) {
|
||||
case DEVICE_KERNEL_INTEGRATOR_RESET: {
|
||||
|
||||
Reference in New Issue
Block a user