Cycles: Pack uint3/int3 structs for oneAPI
This recently changed after a fix in 28f93d5443
but we get better performance by ensuring int3 is packed instead.
Packing int3 currently gives a 7% speedup when rendering wdas_cloud on
Intel Arc B580.
Pull Request: https://projects.blender.org/blender/blender/pulls/145593
This commit is contained in:
committed by
Xavier Hallade
parent
75bca47553
commit
aeb103fb50
@@ -9,7 +9,12 @@
|
||||
CCL_NAMESPACE_BEGIN
|
||||
|
||||
#ifndef __KERNEL_NATIVE_VECTOR_TYPES__
|
||||
# ifdef __KERNEL_ONEAPI__
|
||||
/* Keep structure packed for oneAPI. */
|
||||
struct int3
|
||||
# else
|
||||
struct ccl_try_align(16) int3
|
||||
# endif
|
||||
{
|
||||
# ifdef __KERNEL_GPU__
|
||||
/* Compact structure on the GPU. */
|
||||
@@ -98,8 +103,8 @@ ccl_device_inline void print_int3(const ccl_private char *label, const int3 a)
|
||||
|
||||
#if defined(__KERNEL_METAL__)
|
||||
/* Metal has native packed_int3. */
|
||||
#elif defined(__KERNEL_CUDA__)
|
||||
/* CUDA is already packed. */
|
||||
#elif defined(__KERNEL_CUDA__) || defined(__KERNEL_ONEAPI__)
|
||||
/* CUDA/oneAPI int3 is already packed. */
|
||||
typedef int3 packed_int3;
|
||||
#else
|
||||
/* HIP int3 is not packed (https://github.com/ROCm-Developer-Tools/HIP/issues/706). */
|
||||
|
||||
@@ -36,8 +36,8 @@ ccl_device_inline uint3 make_uint3(const uint x, const uint y, uint z)
|
||||
|
||||
#if defined(__KERNEL_METAL__)
|
||||
/* Metal has native packed_float3. */
|
||||
#elif defined(__KERNEL_CUDA__)
|
||||
/* CUDA uint3 is already packed. */
|
||||
#elif defined(__KERNEL_CUDA__) || defined(__KERNEL_ONEAPI__)
|
||||
/* CUDA/oneAPI uint3 is already packed. */
|
||||
using packed_uint3 = uint3;
|
||||
#else
|
||||
/* HIP uint3 is not packed (https://github.com/ROCm-Developer-Tools/HIP/issues/706). */
|
||||
|
||||
Reference in New Issue
Block a user