diff --git a/build_files/config/pipeline_config.yaml b/build_files/config/pipeline_config.yaml index ad9986d9d0c..16d47f6d534 100644 --- a/build_files/config/pipeline_config.yaml +++ b/build_files/config/pipeline_config.yaml @@ -20,7 +20,7 @@ buildbot: optix: version: '7.4.0' ocloc: - version: '101.5972' + version: '101.6557' cmake: default: version: any diff --git a/intern/cycles/blender/addon/properties.py b/intern/cycles/blender/addon/properties.py index 35379648a3b..3951f0a27a2 100644 --- a/intern/cycles/blender/addon/properties.py +++ b/intern/cycles/blender/addon/properties.py @@ -1787,7 +1787,7 @@ class CyclesPreferences(bpy.types.AddonPreferences): elif device_type == 'ONEAPI': import sys if sys.platform.startswith("win"): - driver_version = "XX.X.101.5730" + driver_version = "XX.X.101.6557" col.label(text=rpt_("Requires Intel GPU with Xe-HPG architecture"), icon='BLANK1', translate=False) col.label(text=rpt_("and Windows driver version %s or newer") % driver_version, icon='BLANK1', translate=False) diff --git a/intern/cycles/device/oneapi/device_impl.cpp b/intern/cycles/device/oneapi/device_impl.cpp index f36473ac0e4..422ee62c54d 100644 --- a/intern/cycles/device/oneapi/device_impl.cpp +++ b/intern/cycles/device/oneapi/device_impl.cpp @@ -57,9 +57,12 @@ OneapiDevice::OneapiDevice(const DeviceInfo &info, Stats &stats, Profiler &profi kg_memory_size_(0) { /* Verify that base class types can be used with specific backend types */ - static_assert(sizeof(texMemObject) == sizeof(void *)); - static_assert(sizeof(arrayMemObject) == sizeof(void *)); + static_assert(sizeof(texMemObject) == + sizeof(sycl::ext::oneapi::experimental::sampled_image_handle)); + static_assert(sizeof(arrayMemObject) == + sizeof(sycl::ext::oneapi::experimental::image_mem_handle)); + need_texture_info = false; use_hardware_raytracing = info.use_hardware_raytracing; oneapi_set_error_cb(queue_error_cb, &oneapi_error_string_); @@ -636,23 +639,188 @@ void OneapiDevice::global_free(device_memory &mem) } } +static sycl::ext::oneapi::experimental::image_descriptor image_desc(const device_texture &mem) +{ + /* Image Texture Storage */ + sycl::image_channel_type channel_type; + + switch (mem.data_type) { + case TYPE_UCHAR: + channel_type = sycl::image_channel_type::unorm_int8; + break; + case TYPE_UINT16: + channel_type = sycl::image_channel_type::unorm_int16; + break; + case TYPE_FLOAT: + channel_type = sycl::image_channel_type::fp32; + break; + case TYPE_HALF: + channel_type = sycl::image_channel_type::fp16; + break; + default: + assert(0); + } + + sycl::ext::oneapi::experimental::image_descriptor param; + param.width = mem.data_width; + param.height = mem.data_height; + param.depth = mem.data_depth == 1 ? 0 : mem.data_depth; + param.num_channels = mem.data_elements; + param.channel_type = channel_type; + + param.verify(); + + return param; +} + void OneapiDevice::tex_alloc(device_texture &mem) { - generic_alloc(mem); - generic_copy_to(mem); + assert(device_queue_); - { - /* Update texture info. */ - thread_scoped_lock lock(texture_info_mutex); - const uint slot = mem.slot; - if (slot >= texture_info.size()) { - /* Allocate some slots in advance, to reduce amount of re-allocations. */ - texture_info.resize(slot + 128); + size_t size = mem.memory_size(); + + sycl::addressing_mode address_mode = sycl::addressing_mode::none; + switch (mem.info.extension) { + case EXTENSION_REPEAT: + address_mode = sycl::addressing_mode::repeat; + break; + case EXTENSION_EXTEND: + address_mode = sycl::addressing_mode::clamp_to_edge; + break; + case EXTENSION_CLIP: + address_mode = sycl::addressing_mode::clamp; + break; + case EXTENSION_MIRROR: + address_mode = sycl::addressing_mode::mirrored_repeat; + break; + default: + assert(0); + break; + } + + sycl::filtering_mode filter_mode; + if (mem.info.interpolation == INTERPOLATION_CLOSEST) { + filter_mode = sycl::filtering_mode::nearest; + } + else { + filter_mode = sycl::filtering_mode::linear; + } + + /* Image Texture Storage */ + sycl::image_channel_type channel_type; + + switch (mem.data_type) { + case TYPE_UCHAR: + channel_type = sycl::image_channel_type::unorm_int8; + break; + case TYPE_UINT16: + channel_type = sycl::image_channel_type::unorm_int16; + break; + case TYPE_FLOAT: + channel_type = sycl::image_channel_type::fp32; + break; + case TYPE_HALF: + channel_type = sycl::image_channel_type::fp16; + break; + default: + assert(0); + return; + } + + sycl::queue *queue = reinterpret_cast(device_queue_); + + try { + Mem *cmem = nullptr; + sycl::ext::oneapi::experimental::image_mem_handle memHandle{0}; + sycl::ext::oneapi::experimental::image_descriptor desc{}; + + if (mem.data_height > 0) { + /* 2D/3D texture -- Tile optimized */ + size_t depth = mem.data_depth == 1 ? 0 : mem.data_depth; + desc = sycl::ext::oneapi::experimental::image_descriptor( + {mem.data_width, mem.data_height, depth}, mem.data_elements, channel_type); + + VLOG_WORK << "Array 2D/3D allocate: " << mem.name << ", " + << string_human_readable_number(mem.memory_size()) << " bytes. (" + << string_human_readable_size(mem.memory_size()) << ")"; + + sycl::ext::oneapi::experimental::image_mem_handle memHandle = + sycl::ext::oneapi::experimental::alloc_image_mem(desc, *queue); + + /* Copy data from host to the texture properly based on the texture description */ + queue->ext_oneapi_copy(mem.host_pointer, memHandle, desc); + + mem.device_pointer = (device_ptr)memHandle.raw_handle; + mem.device_size = size; + stats.mem_alloc(size); + + thread_scoped_lock lock(device_mem_map_mutex); + cmem = &device_mem_map[&mem]; + cmem->texobject = 0; + cmem->array = (arrayMemObject)(memHandle.raw_handle); } + else { + /* 1D texture -- Linear memory */ + desc = sycl::ext::oneapi::experimental::image_descriptor( + {mem.data_width}, mem.data_elements, channel_type); + cmem = generic_alloc(mem); + if (!cmem) { + return; + } + + queue->memcpy((void *)mem.device_pointer, mem.host_pointer, size); + } + + queue->wait_and_throw(); + + /* Set Mapping and tag that we need to (re-)upload to device */ TextureInfo tex_info = mem.info; - tex_info.data = (uint64_t)mem.device_pointer; - texture_info[slot] = tex_info; - need_texture_info = true; + + sycl::ext::oneapi::experimental::bindless_image_sampler samp( + address_mode, sycl::coordinate_normalization_mode::normalized, filter_mode); + + if (mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT && + mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FLOAT3 && + mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FPN && + mem.info.data_type != IMAGE_DATA_TYPE_NANOVDB_FP16) + { + sycl::ext::oneapi::experimental::sampled_image_handle imgHandle; + + if (memHandle.raw_handle) { + /* Create 2D/3D texture handle */ + imgHandle = sycl::ext::oneapi::experimental::create_image(memHandle, samp, desc, *queue); + } + else { + /* Create 1D texture */ + imgHandle = sycl::ext::oneapi::experimental::create_image( + (void *)mem.device_pointer, 0, samp, desc, *queue); + } + + thread_scoped_lock lock(device_mem_map_mutex); + cmem = &device_mem_map[&mem]; + cmem->texobject = (texMemObject)(imgHandle.raw_handle); + + tex_info.data = (uint64_t)cmem->texobject; + } + else { + tex_info.data = (uint64_t)mem.device_pointer; + } + + { + /* Update texture info. */ + thread_scoped_lock lock(texture_info_mutex); + const uint slot = mem.slot; + if (slot >= texture_info.size()) { + /* Allocate some slots in advance, to reduce amount of re-allocations. */ + texture_info.resize(slot + 128); + } + texture_info[slot] = tex_info; + need_texture_info = true; + } + } + catch (sycl::exception const &e) { + set_error("oneAPI texture allocation error: got runtime exception \"" + string(e.what()) + + "\""); } } @@ -662,15 +830,73 @@ void OneapiDevice::tex_copy_to(device_texture &mem) tex_alloc(mem); } else { - generic_copy_to(mem); + if (mem.data_height > 0) { + /* 2D/3D texture -- Tile optimized */ + sycl::ext::oneapi::experimental::image_descriptor desc = image_desc(mem); + + sycl::queue *queue = reinterpret_cast(device_queue_); + + try { + /* Copy data from host to the texture properly based on the texture description */ + thread_scoped_lock lock(device_mem_map_mutex); + const Mem &cmem = device_mem_map[&mem]; + sycl::ext::oneapi::experimental::image_mem_handle image_handle{ + (sycl::ext::oneapi::experimental::image_mem_handle::raw_handle_type)cmem.array}; + queue->ext_oneapi_copy(mem.host_pointer, image_handle, desc); + +# ifdef WITH_CYCLES_DEBUG + queue->wait_and_throw(); +# endif + } + catch (sycl::exception const &e) { + set_error("oneAPI texture copy error: got runtime exception \"" + string(e.what()) + "\""); + } + } + else { + generic_copy_to(mem); + } } } void OneapiDevice::tex_free(device_texture &mem) { - /* There is no texture memory in SYCL. */ if (mem.device_pointer) { - generic_free(mem); + thread_scoped_lock lock(device_mem_map_mutex); + DCHECK(device_mem_map.find(&mem) != device_mem_map.end()); + const Mem &cmem = device_mem_map[&mem]; + + sycl::queue *queue = reinterpret_cast(device_queue_); + + if (cmem.texobject) { + /* Free bindless texture itself. */ + sycl::ext::oneapi::experimental::sampled_image_handle image(cmem.texobject); + sycl::ext::oneapi::experimental::destroy_image_handle(image, *queue); + } + + if (cmem.array) { + /* Free texture memory. */ + sycl::ext::oneapi::experimental::image_mem_handle imgHandle{ + (sycl::ext::oneapi::experimental::image_mem_handle::raw_handle_type)cmem.array}; + + try { + /* We have allocated only standard textures, so we also dellocate only them. */ + sycl::ext::oneapi::experimental::free_image_mem( + imgHandle, sycl::ext::oneapi::experimental::image_type::standard, *queue); + } + catch (sycl::exception const &e) { + set_error("oneAPI texture deallocation error: got runtime exception \"" + + string(e.what()) + "\""); + } + + stats.mem_free(mem.memory_size()); + mem.device_pointer = 0; + mem.device_size = 0; + device_mem_map.erase(device_mem_map.find(&mem)); + } + else { + lock.unlock(); + generic_free(mem); + } } } @@ -1061,11 +1287,11 @@ void OneapiDevice::get_adjusted_global_and_local_sizes(SyclQueue *queue, /* Compute-runtime (ie. NEO) version is what gets returned by sycl/L0 on Windows * since Windows driver 101.3268. */ -static const int lowest_supported_driver_version_win = 1015730; +static const int lowest_supported_driver_version_win = 1016554; # ifdef _WIN32 -/* For Windows driver 101.5730, compute-runtime version is 29550. +/* For Windows driver 101.6557, compute-runtime version is 31896. * This information is returned by `ocloc query OCL_DRIVER_VERSION`.*/ -static const int lowest_supported_driver_version_neo = 29550; +static const int lowest_supported_driver_version_neo = 31896; # else static const int lowest_supported_driver_version_neo = 31740; # endif diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt index 56d9cda484c..25dd081e620 100644 --- a/intern/cycles/kernel/CMakeLists.txt +++ b/intern/cycles/kernel/CMakeLists.txt @@ -134,7 +134,6 @@ set(SRC_KERNEL_DEVICE_ONEAPI_HEADERS device/oneapi/context_intersect_begin.h device/oneapi/context_intersect_end.h device/oneapi/globals.h - device/oneapi/image.h device/oneapi/kernel.h device/oneapi/kernel_templates.h device/cpu/bvh.h diff --git a/intern/cycles/kernel/device/oneapi/compat.h b/intern/cycles/kernel/device/oneapi/compat.h index 630c01f1b40..e2733ae8cb2 100644 --- a/intern/cycles/kernel/device/oneapi/compat.h +++ b/intern/cycles/kernel/device/oneapi/compat.h @@ -236,3 +236,78 @@ ccl_device_forceinline int __float_as_int(const float x) /* Types */ #include "util/half.h" #include "util/types.h" + +static_assert( + sizeof(sycl::ext::oneapi::experimental::sampled_image_handle::raw_image_handle_type) == + sizeof(uint64_t)); +typedef uint64_t ccl_gpu_tex_object_2D; +typedef uint64_t ccl_gpu_tex_object_3D; + +template +ccl_device_forceinline T ccl_gpu_tex_object_read_2D(const ccl_gpu_tex_object_2D texobj, + const float x, + const float y) +{ + /* Generic implementation not possible due to limitation with SYCL bindless sampled images + * not being able to read in a format, which is different from the supported data type of + * the texture. + * But looks it looks like this is not a problem at the moment. */ + static_assert(false); + return T(); +} + +template<> +ccl_device_forceinline float ccl_gpu_tex_object_read_2D(const ccl_gpu_tex_object_2D texobj, + const float x, + const float y) +{ + sycl::ext::oneapi::experimental::sampled_image_handle image( + (sycl::ext::oneapi::experimental::sampled_image_handle::raw_image_handle_type)texobj); + return sycl::ext::oneapi::experimental::sample_image(image, sycl::float2{x, y}); +} + +template<> +ccl_device_forceinline float4 ccl_gpu_tex_object_read_2D( + const ccl_gpu_tex_object_2D texobj, const float x, const float y) +{ + sycl::ext::oneapi::experimental::sampled_image_handle image( + (sycl::ext::oneapi::experimental::sampled_image_handle::raw_image_handle_type)texobj); + return sycl::ext::oneapi::experimental::sample_image>( + image, sycl::float2{x, y}); +} + +template +ccl_device_forceinline T ccl_gpu_tex_object_read_3D(const ccl_gpu_tex_object_3D texobj, + const float x, + const float y, + const float z) +{ + /* A generic implementation is not possible due to limitations with SYCL bindless sampled images + * not being able to read in a format that is different from the supported data type of + * the texture. + * However, it looks like this is not a problem at the moment, but I am leaving a static + * assert in order to easily detect if it becomes a problem in the future. */ + static_assert(false); + return T(); +} + +template<> +ccl_device_forceinline float ccl_gpu_tex_object_read_3D(const ccl_gpu_tex_object_3D texobj, + const float x, + const float y, + const float z) +{ + sycl::ext::oneapi::experimental::sampled_image_handle image( + (sycl::ext::oneapi::experimental::sampled_image_handle::raw_image_handle_type)texobj); + return sycl::ext::oneapi::experimental::sample_image(image, sycl::float3{x, y, z}); +} + +template<> +ccl_device_forceinline float4 ccl_gpu_tex_object_read_3D( + const ccl_gpu_tex_object_3D texobj, const float x, const float y, const float z) +{ + sycl::ext::oneapi::experimental::sampled_image_handle image( + (sycl::ext::oneapi::experimental::sampled_image_handle::raw_image_handle_type)texobj); + return sycl::ext::oneapi::experimental::sample_image>( + image, sycl::float3{x, y, z}); +} diff --git a/intern/cycles/kernel/device/oneapi/context_begin.h b/intern/cycles/kernel/device/oneapi/context_begin.h index 8f705983567..96898ad395b 100644 --- a/intern/cycles/kernel/device/oneapi/context_begin.h +++ b/intern/cycles/kernel/device/oneapi/context_begin.h @@ -7,5 +7,5 @@ /* clang-format off */ struct ONEAPIKernelContext : public KernelGlobalsGPU { public: -# include "kernel/device/oneapi/image.h" +# include "kernel/device/gpu/image.h" /* clang-format on */ diff --git a/intern/cycles/kernel/device/oneapi/image.h b/intern/cycles/kernel/device/oneapi/image.h deleted file mode 100644 index e4fbaf5b50a..00000000000 --- a/intern/cycles/kernel/device/oneapi/image.h +++ /dev/null @@ -1,433 +0,0 @@ -/* SPDX-FileCopyrightText: 2021-2022 Intel Corporation - * - * SPDX-License-Identifier: Apache-2.0 */ - -CCL_NAMESPACE_BEGIN - -/* For oneAPI implementation we do manual lookup and interpolation. */ -/* TODO: share implementation with ../cpu/image.h. */ - -template ccl_device_forceinline T tex_fetch(const TextureInfo &info, const int index) -{ - return reinterpret_cast(info.data)[index]; -} - -ccl_device_inline int svm_image_texture_wrap_periodic(int x, int width) -{ - x %= width; - if (x < 0) { - x += width; - } - return x; -} - -ccl_device_inline int svm_image_texture_wrap_clamp(const int x, const int width) -{ - return clamp(x, 0, width - 1); -} - -ccl_device_inline int svm_image_texture_wrap_mirror(const int x, const int width) -{ - const int m = abs(x + (x < 0)) % (2 * width); - if (m >= width) { - return 2 * width - m - 1; - } - return m; -} - -ccl_device_inline float4 svm_image_texture_read(const TextureInfo &info, - const int x, - int y, - const int z) -{ - const int data_offset = x + info.width * y + info.width * info.height * z; - const int texture_type = info.data_type; - - /* Float4 */ - if (texture_type == IMAGE_DATA_TYPE_FLOAT4) { - return tex_fetch(info, data_offset); - } - /* Byte4 */ - if (texture_type == IMAGE_DATA_TYPE_BYTE4) { - uchar4 r = tex_fetch(info, data_offset); - float f = 1.0f / 255.0f; - return make_float4(r.x * f, r.y * f, r.z * f, r.w * f); - } - /* Ushort4 */ - if (texture_type == IMAGE_DATA_TYPE_USHORT4) { - ushort4 r = tex_fetch(info, data_offset); - float f = 1.0f / 65535.f; - return make_float4(r.x * f, r.y * f, r.z * f, r.w * f); - } - /* Float */ - if (texture_type == IMAGE_DATA_TYPE_FLOAT) { - float f = tex_fetch(info, data_offset); - return make_float4(f, f, f, 1.0f); - } - /* UShort */ - if (texture_type == IMAGE_DATA_TYPE_USHORT) { - ushort r = tex_fetch(info, data_offset); - float f = r * (1.0f / 65535.0f); - return make_float4(f, f, f, 1.0f); - } - if (texture_type == IMAGE_DATA_TYPE_HALF) { - float f = tex_fetch(info, data_offset); - return make_float4(f, f, f, 1.0f); - } - if (texture_type == IMAGE_DATA_TYPE_HALF4) { - half4 r = tex_fetch(info, data_offset); - return make_float4(r.x, r.y, r.z, r.w); - } - /* Byte */ - uchar r = tex_fetch(info, data_offset); - float f = r * (1.0f / 255.0f); - return make_float4(f, f, f, 1.0f); -} - -ccl_device_inline float4 svm_image_texture_read_2d(const int id, int x, int y) -{ - const TextureInfo &info = kernel_data_fetch(texture_info, id); - - /* Wrap */ - if (info.extension == EXTENSION_REPEAT) { - x = svm_image_texture_wrap_periodic(x, info.width); - y = svm_image_texture_wrap_periodic(y, info.height); - } - else if (info.extension == EXTENSION_EXTEND) { - x = svm_image_texture_wrap_clamp(x, info.width); - y = svm_image_texture_wrap_clamp(y, info.height); - } - else if (info.extension == EXTENSION_MIRROR) { - x = svm_image_texture_wrap_mirror(x, info.width); - y = svm_image_texture_wrap_mirror(y, info.height); - } - else { - if (x < 0 || x >= info.width || y < 0 || y >= info.height) { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - } - - return svm_image_texture_read(info, x, y, 0); -} - -ccl_device_inline float4 svm_image_texture_read_3d(const int id, int x, int y, int z) -{ - const TextureInfo &info = kernel_data_fetch(texture_info, id); - - /* Wrap */ - if (info.extension == EXTENSION_REPEAT) { - x = svm_image_texture_wrap_periodic(x, info.width); - y = svm_image_texture_wrap_periodic(y, info.height); - z = svm_image_texture_wrap_periodic(z, info.depth); - } - else if (info.extension == EXTENSION_EXTEND) { - x = svm_image_texture_wrap_clamp(x, info.width); - y = svm_image_texture_wrap_clamp(y, info.height); - z = svm_image_texture_wrap_clamp(z, info.depth); - } - else if (info.extension == EXTENSION_MIRROR) { - x = svm_image_texture_wrap_mirror(x, info.width); - y = svm_image_texture_wrap_mirror(y, info.height); - z = svm_image_texture_wrap_mirror(z, info.depth); - } - else { - if (x < 0 || x >= info.width || y < 0 || y >= info.height || z < 0 || z >= info.depth) { - return make_float4(0.0f, 0.0f, 0.0f, 0.0f); - } - } - - return svm_image_texture_read(info, x, y, z); -} - -static float svm_image_texture_frac(const float x, int *ix) -{ - int i = float_to_int(x) - ((x < 0.0f) ? 1 : 0); - *ix = i; - return x - (float)i; -} - -#define SET_CUBIC_SPLINE_WEIGHTS(u, t) \ - { \ - u[0] = (((-1.0f / 6.0f) * t + 0.5f) * t - 0.5f) * t + (1.0f / 6.0f); \ - u[1] = ((0.5f * t - 1.0f) * t) * t + (2.0f / 3.0f); \ - u[2] = ((-0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f / 6.0f); \ - u[3] = (1.0f / 6.0f) * t * t * t; \ - } \ - (void)0 - -ccl_device float4 kernel_tex_image_interp(KernelGlobals kg, const int id, float x, float y) -{ - const TextureInfo &info = kernel_data_fetch(texture_info, id); - - if (info.interpolation == INTERPOLATION_CLOSEST) { - /* Closest interpolation. */ - int ix, iy; - svm_image_texture_frac(x * info.width, &ix); - svm_image_texture_frac(y * info.height, &iy); - - return svm_image_texture_read_2d(id, ix, iy); - } - if (info.interpolation == INTERPOLATION_LINEAR) { - /* Bilinear interpolation. */ - int ix, iy; - float tx = svm_image_texture_frac(x * info.width - 0.5f, &ix); - float ty = svm_image_texture_frac(y * info.height - 0.5f, &iy); - - float4 r; - r = (1.0f - ty) * (1.0f - tx) * svm_image_texture_read_2d(id, ix, iy); - r += (1.0f - ty) * tx * svm_image_texture_read_2d(id, ix + 1, iy); - r += ty * (1.0f - tx) * svm_image_texture_read_2d(id, ix, iy + 1); - r += ty * tx * svm_image_texture_read_2d(id, ix + 1, iy + 1); - return r; - } - /* Bicubic interpolation. */ - int ix, iy; - float tx = svm_image_texture_frac(x * info.width - 0.5f, &ix); - float ty = svm_image_texture_frac(y * info.height - 0.5f, &iy); - - float u[4], v[4]; - SET_CUBIC_SPLINE_WEIGHTS(u, tx); - SET_CUBIC_SPLINE_WEIGHTS(v, ty); - - float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f); - - for (int y = 0; y < 4; y++) { - for (int x = 0; x < 4; x++) { - float weight = u[x] * v[y]; - r += weight * svm_image_texture_read_2d(id, ix + x - 1, iy + y - 1); - } - } - return r; -} - -#ifdef WITH_NANOVDB -template struct NanoVDBInterpolator { - - static ccl_always_inline float read(const float r) - { - return r; - } - - static ccl_always_inline float4 read(const packed_float3 r) - { - return make_float4(r.x, r.y, r.z, 1.0f); - } - - template - static ccl_always_inline OutT - interp_3d_closest(const Acc &acc, const float x, float y, const float z) - { - const nanovdb::Coord coord(int32_t(rintf(x)), int32_t(rintf(y)), int32_t(rintf(z))); - return read(acc.getValue(coord)); - } - - template - static ccl_always_inline OutT - interp_3d_linear(const Acc &acc, const float x, float y, const float z) - { - int ix, iy, iz; - const float tx = svm_image_texture_frac(x - 0.5f, &ix); - const float ty = svm_image_texture_frac(y - 0.5f, &iy); - const float tz = svm_image_texture_frac(z - 0.5f, &iz); - - return mix(mix(mix(read(acc.getValue(nanovdb::Coord(ix, iy, iz))), - read(acc.getValue(nanovdb::Coord(ix, iy, iz + 1))), - tz), - mix(read(acc.getValue(nanovdb::Coord(ix, iy + 1, iz + 1))), - read(acc.getValue(nanovdb::Coord(ix, iy + 1, iz))), - 1.0f - tz), - ty), - mix(mix(read(acc.getValue(nanovdb::Coord(ix + 1, iy + 1, iz))), - read(acc.getValue(nanovdb::Coord(ix + 1, iy + 1, iz + 1))), - tz), - mix(read(acc.getValue(nanovdb::Coord(ix + 1, iy, iz + 1))), - read(acc.getValue(nanovdb::Coord(ix + 1, iy, iz))), - 1.0f - tz), - 1.0f - ty), - tx); - } - - /* Tricubic b-spline interpolation. */ - template - static ccl_always_inline OutT - interp_3d_cubic(const Acc &acc, const float x, float y, const float z) - { - int ix, iy, iz; - int nix, niy, niz; - int pix, piy, piz; - int nnix, nniy, nniz; - - /* A -0.5 offset is used to center the cubic samples around the sample point. */ - const float tx = svm_image_texture_frac(x - 0.5f, &ix); - const float ty = svm_image_texture_frac(y - 0.5f, &iy); - const float tz = svm_image_texture_frac(z - 0.5f, &iz); - - pix = ix - 1; - piy = iy - 1; - piz = iz - 1; - nix = ix + 1; - niy = iy + 1; - niz = iz + 1; - nnix = ix + 2; - nniy = iy + 2; - nniz = iz + 2; - - const int xc[4] = {pix, ix, nix, nnix}; - const int yc[4] = {piy, iy, niy, nniy}; - const int zc[4] = {piz, iz, niz, nniz}; - float u[4], v[4], w[4]; - - /* Some helper macros to keep code size reasonable. - * Lets the compiler inline all the matrix multiplications. - */ -# define DATA(x, y, z) (read(acc.getValue(nanovdb::Coord(xc[x], yc[y], zc[z])))) -# define COL_TERM(col, row) \ - (v[col] * (u[0] * DATA(0, col, row) + u[1] * DATA(1, col, row) + u[2] * DATA(2, col, row) + \ - u[3] * DATA(3, col, row))) -# define ROW_TERM(row) \ - (w[row] * (COL_TERM(0, row) + COL_TERM(1, row) + COL_TERM(2, row) + COL_TERM(3, row))) - - SET_CUBIC_SPLINE_WEIGHTS(u, tx); - SET_CUBIC_SPLINE_WEIGHTS(v, ty); - SET_CUBIC_SPLINE_WEIGHTS(w, tz); - - /* Actual interpolation. */ - return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3); - -# undef COL_TERM -# undef ROW_TERM -# undef DATA - } - - static ccl_always_inline OutT - interp_3d(const TextureInfo &info, const float x, float y, const float z, const int interp) - { - using namespace nanovdb; - - NanoGrid *const grid = (NanoGrid *)info.data; - - switch (interp) { - case INTERPOLATION_CLOSEST: { - ReadAccessor acc(grid->tree().root()); - return interp_3d_closest(acc, x, y, z); - } - case INTERPOLATION_LINEAR: { - CachedReadAccessor acc(grid->tree().root()); - return interp_3d_linear(acc, x, y, z); - } - default: { - CachedReadAccessor acc(grid->tree().root()); - return interp_3d_cubic(acc, x, y, z); - } - } - } -}; -#endif /* WITH_NANOVDB */ - -ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals kg, - const int id, - float3 P, - const int interp) -{ - const TextureInfo &info = kernel_data_fetch(texture_info, id); - - if (info.use_transform_3d) { - Transform tfm = info.transform_3d; - P = transform_point(&tfm, P); - } - - float x = P.x; - float y = P.y; - float z = P.z; - - uint interpolation = (interp == INTERPOLATION_NONE) ? info.interpolation : interp; - -#ifdef WITH_NANOVDB - if (info.data_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT) { - const float f = NanoVDBInterpolator::interp_3d(info, x, y, z, interpolation); - return make_float4(f, f, f, 1.0f); - } - if (info.data_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT3) { - return NanoVDBInterpolator::interp_3d(info, x, y, z, interpolation); - } - if (info.data_type == IMAGE_DATA_TYPE_NANOVDB_FPN) { - const float f = NanoVDBInterpolator::interp_3d( - info, x, y, z, interpolation); - return make_float4(f, f, f, 1.0f); - } - if (info.data_type == IMAGE_DATA_TYPE_NANOVDB_FP16) { - const float f = NanoVDBInterpolator::interp_3d( - info, x, y, z, interpolation); - return make_float4(f, f, f, 1.0f); - } -#else - if (info.data_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT || - info.data_type == IMAGE_DATA_TYPE_NANOVDB_FLOAT3 || - info.data_type == IMAGE_DATA_TYPE_NANOVDB_FPN || - info.data_type == IMAGE_DATA_TYPE_NANOVDB_FP16) - { - return make_float4( - TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B, TEX_IMAGE_MISSING_A); - } -#endif - else { - x *= info.width; - y *= info.height; - z *= info.depth; - } - - if (interpolation == INTERPOLATION_CLOSEST) { - /* Closest interpolation. */ - int ix, iy, iz; - svm_image_texture_frac(x, &ix); - svm_image_texture_frac(y, &iy); - svm_image_texture_frac(z, &iz); - - return svm_image_texture_read_3d(id, ix, iy, iz); - } - if (interpolation == INTERPOLATION_LINEAR) { - /* Trilinear interpolation. */ - int ix, iy, iz; - float tx = svm_image_texture_frac(x - 0.5f, &ix); - float ty = svm_image_texture_frac(y - 0.5f, &iy); - float tz = svm_image_texture_frac(z - 0.5f, &iz); - - float4 r; - r = (1.0f - tz) * (1.0f - ty) * (1.0f - tx) * svm_image_texture_read_3d(id, ix, iy, iz); - r += (1.0f - tz) * (1.0f - ty) * tx * svm_image_texture_read_3d(id, ix + 1, iy, iz); - r += (1.0f - tz) * ty * (1.0f - tx) * svm_image_texture_read_3d(id, ix, iy + 1, iz); - r += (1.0f - tz) * ty * tx * svm_image_texture_read_3d(id, ix + 1, iy + 1, iz); - - r += tz * (1.0f - ty) * (1.0f - tx) * svm_image_texture_read_3d(id, ix, iy, iz + 1); - r += tz * (1.0f - ty) * tx * svm_image_texture_read_3d(id, ix + 1, iy, iz + 1); - r += tz * ty * (1.0f - tx) * svm_image_texture_read_3d(id, ix, iy + 1, iz + 1); - r += tz * ty * tx * svm_image_texture_read_3d(id, ix + 1, iy + 1, iz + 1); - return r; - } - /* Tri-cubic interpolation. */ - int ix, iy, iz; - float tx = svm_image_texture_frac(x - 0.5f, &ix); - float ty = svm_image_texture_frac(y - 0.5f, &iy); - float tz = svm_image_texture_frac(z - 0.5f, &iz); - - float u[4], v[4], w[4]; - SET_CUBIC_SPLINE_WEIGHTS(u, tx); - SET_CUBIC_SPLINE_WEIGHTS(v, ty); - SET_CUBIC_SPLINE_WEIGHTS(w, tz); - - float4 r = make_float4(0.0f, 0.0f, 0.0f, 0.0f); - - for (int z = 0; z < 4; z++) { - for (int y = 0; y < 4; y++) { - for (int x = 0; x < 4; x++) { - float weight = u[x] * v[y] * w[z]; - r += weight * svm_image_texture_read_3d(id, ix + x - 1, iy + y - 1, iz + z - 1); - } - } - } - return r; -} - -#undef SET_CUBIC_SPLINE_WEIGHTS - -CCL_NAMESPACE_END