Cycles: Pack uint3/int3 structs for oneAPI

This recently changed after a fix in 28f93d5443
but we get better performance by ensuring int3 is packed instead.

Packing int3 currently gives a 7% speedup when rendering wdas_cloud on
Intel Arc B580.

Pull Request: https://projects.blender.org/blender/blender/pulls/145593
This commit is contained in:
Xavier Hallade
2025-09-08 09:22:32 +02:00
committed by Xavier Hallade
parent 75bca47553
commit aeb103fb50
2 changed files with 9 additions and 4 deletions

View File

@@ -9,7 +9,12 @@
CCL_NAMESPACE_BEGIN
#ifndef __KERNEL_NATIVE_VECTOR_TYPES__
# ifdef __KERNEL_ONEAPI__
/* Keep structure packed for oneAPI. */
struct int3
# else
struct ccl_try_align(16) int3
# endif
{
# ifdef __KERNEL_GPU__
/* Compact structure on the GPU. */
@@ -98,8 +103,8 @@ ccl_device_inline void print_int3(const ccl_private char *label, const int3 a)
#if defined(__KERNEL_METAL__)
/* Metal has native packed_int3. */
#elif defined(__KERNEL_CUDA__)
/* CUDA is already packed. */
#elif defined(__KERNEL_CUDA__) || defined(__KERNEL_ONEAPI__)
/* CUDA/oneAPI int3 is already packed. */
typedef int3 packed_int3;
#else
/* HIP int3 is not packed (https://github.com/ROCm-Developer-Tools/HIP/issues/706). */

View File

@@ -36,8 +36,8 @@ ccl_device_inline uint3 make_uint3(const uint x, const uint y, uint z)
#if defined(__KERNEL_METAL__)
/* Metal has native packed_float3. */
#elif defined(__KERNEL_CUDA__)
/* CUDA uint3 is already packed. */
#elif defined(__KERNEL_CUDA__) || defined(__KERNEL_ONEAPI__)
/* CUDA/oneAPI uint3 is already packed. */
using packed_uint3 = uint3;
#else
/* HIP uint3 is not packed (https://github.com/ROCm-Developer-Tools/HIP/issues/706). */