Cycles: Show device optimizations status in preferences for oneAPI

With these changes, we can now mark devices which are expected to work as
performant as possible, and devices which were not optimized for some reason.

For example, because the device was released after the Blender release,
making it impossible for developers to optimize for devices in already
released unchangeable code. This is primarily relevant for the LTS versions,
which are supported for two years and require proper communication about
optimization status for the new devices released during this time.

This is implemented for oneAPI devices. Other device types currently are
marked as optimized for compatibility with old behavior, but may implement
the same in the future.

Pull Request: https://projects.blender.org/blender/blender/pulls/139751
This commit is contained in:
Nikita Sirgienko
2025-06-03 20:07:52 +02:00
committed by Brecht Van Lommel
parent 6805385cbe
commit 69091c5028
8 changed files with 119 additions and 19 deletions

View File

@@ -740,6 +740,8 @@ This option is only for debugging purposes."
# https://www.intel.com/content/www/us/en/develop/documentation/oneapi-dpcpp-cpp-compiler-dev-guide-and-reference/top/compilation/ahead-of-time-compilation.html
# The supported devices can be retrieved from `ocloc` output when running
# `ocloc compile --help`.
# If you have completed optimization work and now want to enable AoT for new Intel devices,
# update the optimization status in OneapiDevice::architecture_information.
set(CYCLES_ONEAPI_INTEL_BINARIES_ARCH dg2 mtl lnl bmg CACHE STRING "\
oneAPI Intel GPU architectures to build binaries for"
)

View File

@@ -269,7 +269,7 @@ def enum_openimagedenoise_denoiser(self, context):
def enum_optix_denoiser(self, context):
if not context or bool(context.preferences.addons[__package__].preferences.get_devices_for_type('OPTIX')):
if not context or bool(context.preferences.addons[__package__].preferences.get_device_list('OPTIX')):
return [('OPTIX', "OptiX", n_(
"Use the OptiX AI denoiser with GPU acceleration, only available on NVIDIA GPUs when configured in the system tab in the user preferences"), 2)]
return []
@@ -1668,16 +1668,19 @@ class CyclesPreferences(bpy.types.AddonPreferences):
# Update name in case it changed
entry.name = device[0]
# Gets all devices types for a compute device type.
def get_devices_for_type(self, compute_device_type):
# Gets all devices types to display in the preferences for a compute device type.
# This includes the CPU device.
def get_devices_for_type(self, compute_device_type, device_list=None):
# Layout of the device tuples: (Name, Type, Persistent ID)
device_list = self.get_device_list(compute_device_type)
if device_list is None:
device_list = self.get_device_list(compute_device_type)
# Sort entries into lists
devices = []
cpu_devices = []
for device in device_list:
entry = self.find_existing_device_entry(device)
entry.optimized = device[7]
if entry.type == compute_device_type:
devices.append(entry)
elif entry.type == 'CPU':
@@ -1768,9 +1771,20 @@ class CyclesPreferences(bpy.types.AddonPreferences):
return False
def _draw_devices(self, layout, device_type, devices):
@staticmethod
def _format_device_name(name):
import unicodedata
return name.replace('(TM)', unicodedata.lookup('TRADE MARK SIGN')) \
.replace('(tm)', unicodedata.lookup('TRADE MARK SIGN')) \
.replace('(R)', unicodedata.lookup('REGISTERED SIGN')) \
.replace('(C)', unicodedata.lookup('COPYRIGHT SIGN'))
def _draw_devices(self, layout, device_type, device_list):
box = layout.box()
# Get preference devices, including CPU.
devices = self.get_devices_for_type(device_type, device_list)
found_device = False
for device in devices:
if device.type == device_type:
@@ -1839,15 +1853,10 @@ class CyclesPreferences(bpy.types.AddonPreferences):
return
for device in devices:
import unicodedata
box.prop(
device, "use", text=device.name
.replace('(TM)', unicodedata.lookup('TRADE MARK SIGN'))
.replace('(tm)', unicodedata.lookup('TRADE MARK SIGN'))
.replace('(R)', unicodedata.lookup('REGISTERED SIGN'))
.replace('(C)', unicodedata.lookup('COPYRIGHT SIGN')),
translate=False
)
name = self._format_device_name(device.name)
if not device.optimized:
name += rpt_(" (Unoptimized Performance)")
box.prop(device, "use", text=name, translate=False)
def draw_impl(self, layout, context):
row = layout.row()
@@ -1857,14 +1866,14 @@ class CyclesPreferences(bpy.types.AddonPreferences):
if compute_device_type == 'NONE':
return
row = layout.row()
devices = self.get_devices_for_type(compute_device_type)
devices = self.get_device_list(compute_device_type)
self._draw_devices(row, compute_device_type, devices)
import _cycles
has_peer_memory = False
has_enabled_hardware_rt = False
has_disabled_hardware_rt = False
for device in self.get_device_list(compute_device_type):
for device in devices:
if not self.find_existing_device_entry(device).use:
continue
if device[1] != compute_device_type:

View File

@@ -440,7 +440,7 @@ static PyObject *available_devices_func(PyObject * /*self*/, PyObject *args)
for (size_t i = 0; i < devices.size(); i++) {
const DeviceInfo &device = devices[i];
const string type_name = Device::string_from_type(device.type);
PyObject *device_tuple = PyTuple_New(7);
PyObject *device_tuple = PyTuple_New(8);
PyTuple_SET_ITEM(device_tuple, 0, pyunicode_from_string(device.description.c_str()));
PyTuple_SET_ITEM(device_tuple, 1, pyunicode_from_string(type_name.c_str()));
PyTuple_SET_ITEM(device_tuple, 2, pyunicode_from_string(device.id.c_str()));
@@ -449,6 +449,7 @@ static PyObject *available_devices_func(PyObject * /*self*/, PyObject *args)
PyTuple_SET_ITEM(
device_tuple, 5, PyBool_FromLong(device.denoisers & DENOISER_OPENIMAGEDENOISE));
PyTuple_SET_ITEM(device_tuple, 6, PyBool_FromLong(device.denoisers & DENOISER_OPTIX));
PyTuple_SET_ITEM(device_tuple, 7, PyBool_FromLong(device.has_execution_optimization));
PyTuple_SET_ITEM(ret, i, device_tuple);
}

View File

@@ -94,6 +94,10 @@ class DeviceInfo {
bool has_gpu_queue = false; /* Device supports GPU queue. */
bool use_hardware_raytracing = false; /* Use hardware instructions to accelerate ray tracing. */
bool use_metalrt_by_default = false; /* Use MetalRT by default. */
/* Indicate that device execution has been optimized by Blender or vendor developers.
* For LTS versions, this helps communicate that newer versions may have better performance. */
bool has_execution_optimization = true;
KernelOptimizationLevel kernel_optimization_level =
KERNEL_OPTIMIZATION_LEVEL_FULL; /* Optimization level applied to path tracing
* kernels (Metal only). */

View File

@@ -99,6 +99,7 @@ static void device_iterator_cb(const char *id,
const int num,
bool hwrt_support,
bool oidn_support,
bool has_execution_optimization,
void *user_ptr)
{
vector<DeviceInfo> *devices = (vector<DeviceInfo> *)user_ptr;
@@ -140,6 +141,8 @@ static void device_iterator_cb(const char *id,
(void)hwrt_support;
# endif
info.has_execution_optimization = has_execution_optimization;
devices->push_back(info);
VLOG_INFO << "Added device \"" << info.description << "\" with id \"" << info.id << "\".";

View File

@@ -1454,6 +1454,64 @@ std::vector<sycl::device> available_sycl_devices()
return available_devices;
}
void OneapiDevice::architecture_information(const SyclDevice *device,
string &name,
bool &is_optimized)
{
const sycl::ext::oneapi::experimental::architecture arch =
reinterpret_cast<const sycl::device *>(device)
->get_info<sycl::ext::oneapi::experimental::info::device::architecture>();
# define FILL_ARCH_INFO(architecture_code, is_arch_optimised) \
case sycl::ext::oneapi::experimental::architecture ::architecture_code: \
name = #architecture_code; \
is_optimized = is_arch_optimised; \
break;
/* List of architectures that have been optimized by Intel and Blender developers.
*
* For example, Intel Rocket Lake iGPU (rkl) is not supported and not optimized,
* while Intel Arc Alchemist dGPU (dg2) was optimized for.
*
* Devices can changed from unoptimized to optimized manually, after DPC++ has
* been upgraded to support the architecture and CYCLES_ONEAPI_INTEL_BINARIES_ARCH
* in CMake includes the architecture. */
switch (arch) {
FILL_ARCH_INFO(intel_gpu_bdw, false)
FILL_ARCH_INFO(intel_gpu_skl, false)
FILL_ARCH_INFO(intel_gpu_kbl, false)
FILL_ARCH_INFO(intel_gpu_cfl, false)
FILL_ARCH_INFO(intel_gpu_apl, false)
FILL_ARCH_INFO(intel_gpu_glk, false)
FILL_ARCH_INFO(intel_gpu_whl, false)
FILL_ARCH_INFO(intel_gpu_aml, false)
FILL_ARCH_INFO(intel_gpu_cml, false)
FILL_ARCH_INFO(intel_gpu_icllp, false)
FILL_ARCH_INFO(intel_gpu_ehl, false)
FILL_ARCH_INFO(intel_gpu_tgllp, false)
FILL_ARCH_INFO(intel_gpu_rkl, false)
FILL_ARCH_INFO(intel_gpu_adl_s, false)
FILL_ARCH_INFO(intel_gpu_adl_p, false)
FILL_ARCH_INFO(intel_gpu_adl_n, false)
FILL_ARCH_INFO(intel_gpu_dg1, false)
FILL_ARCH_INFO(intel_gpu_dg2_g10, true)
FILL_ARCH_INFO(intel_gpu_dg2_g11, true)
FILL_ARCH_INFO(intel_gpu_dg2_g12, true)
FILL_ARCH_INFO(intel_gpu_pvc, false)
FILL_ARCH_INFO(intel_gpu_pvc_vg, false)
/* intel_gpu_mtl_u == intel_gpu_mtl_s == intel_gpu_arl_u == intel_gpu_arl_s */
FILL_ARCH_INFO(intel_gpu_mtl_u, true)
FILL_ARCH_INFO(intel_gpu_mtl_h, true)
FILL_ARCH_INFO(intel_gpu_bmg_g21, true)
FILL_ARCH_INFO(intel_gpu_lnl_m, true)
default:
name = "unknown";
is_optimized = false;
break;
}
}
char *OneapiDevice::device_capabilities()
{
std::stringstream capabilities;
@@ -1470,6 +1528,15 @@ char *OneapiDevice::device_capabilities()
capabilities << "\t\tsycl::info::platform::name\t\t\t"
<< device.get_platform().get_info<sycl::info::platform::name>() << "\n";
string arch_name;
bool is_optimised_for_arch;
architecture_information(
reinterpret_cast<const SyclDevice *>(&device), arch_name, is_optimised_for_arch);
capabilities << "\t\tsycl::info::device::architecture\t\t\t";
capabilities << arch_name << "\n";
capabilities << "\t\tsycl::info::device::is_cycles_optimized\t\t\t";
capabilities << is_optimised_for_arch << "\n";
# define WRITE_ATTR(attribute_name, attribute_variable) \
capabilities << "\t\tsycl::info::device::" #attribute_name "\t\t\t" << attribute_variable \
<< "\n";
@@ -1587,10 +1654,22 @@ void OneapiDevice::iterate_devices(OneAPIDeviceIteratorCallback cb, void *user_p
bool oidn_support = false;
# endif
std::string id = "ONEAPI_" + platform_name + "_" + name;
string arch_name;
bool is_optimised_for_arch;
architecture_information(
reinterpret_cast<const SyclDevice *>(&device), arch_name, is_optimised_for_arch);
if (device.has(sycl::aspect::ext_intel_pci_address)) {
id.append("_" + device.get_info<sycl::ext::intel::info::device::pci_address>());
}
(cb)(id.c_str(), name.c_str(), num, hwrt_support, oidn_support, user_ptr);
(cb)(id.c_str(),
name.c_str(),
num,
hwrt_support,
oidn_support,
is_optimised_for_arch,
user_ptr);
num++;
}
}

View File

@@ -16,7 +16,7 @@ CCL_NAMESPACE_BEGIN
class DeviceQueue;
using OneAPIDeviceIteratorCallback =
void (*)(const char *, const char *, const int, bool, bool, void *);
void (*)(const char *, const char *, const int, bool, bool, bool, void *);
class OneapiDevice : public GPUDevice {
private:
@@ -117,6 +117,7 @@ class OneapiDevice : public GPUDevice {
void *usm_aligned_alloc_host(const size_t memory_size, const size_t alignment);
void usm_free(void *usm_ptr);
static void architecture_information(const SyclDevice *device, string &name, bool &is_optimized);
static char *device_capabilities();
static void iterate_devices(OneAPIDeviceIteratorCallback cb, void *user_ptr);

View File

@@ -25,6 +25,7 @@ enum DeviceKernel : int;
# endif
class SyclQueue;
class SyclDevice;
typedef void (*OneAPIErrorCallback)(const char *error, void *user_ptr);