Refactor: Cycles: Remove now unused 3D image texture support
Pull Request: https://projects.blender.org/blender/blender/pulls/132908
This commit is contained in:
@@ -4,7 +4,6 @@
|
||||
|
||||
#ifdef WITH_CUDA
|
||||
|
||||
# include <climits>
|
||||
# include <cstdio>
|
||||
# include <cstdlib>
|
||||
# include <cstring>
|
||||
@@ -18,6 +17,7 @@
|
||||
# include "util/path.h"
|
||||
# include "util/string.h"
|
||||
# include "util/system.h"
|
||||
# include "util/texture.h"
|
||||
# include "util/time.h"
|
||||
# include "util/types.h"
|
||||
|
||||
@@ -747,24 +747,6 @@ static CUDA_MEMCPY2D tex_2d_copy_param(const device_texture &mem, const int pitc
|
||||
return param;
|
||||
}
|
||||
|
||||
static CUDA_MEMCPY3D tex_3d_copy_param(const device_texture &mem)
|
||||
{
|
||||
const size_t src_pitch = tex_src_pitch(mem);
|
||||
|
||||
CUDA_MEMCPY3D param;
|
||||
memset(¶m, 0, sizeof(param));
|
||||
param.dstMemoryType = CU_MEMORYTYPE_ARRAY;
|
||||
param.dstArray = (CUarray)mem.device_pointer;
|
||||
param.srcMemoryType = CU_MEMORYTYPE_HOST;
|
||||
param.srcHost = mem.host_pointer;
|
||||
param.srcPitch = src_pitch;
|
||||
param.WidthInBytes = param.srcPitch;
|
||||
param.Height = mem.data_height;
|
||||
param.Depth = mem.data_depth;
|
||||
|
||||
return param;
|
||||
}
|
||||
|
||||
void CUDADevice::tex_alloc(device_texture &mem)
|
||||
{
|
||||
CUDAContextScope scope(this);
|
||||
@@ -826,50 +808,11 @@ void CUDADevice::tex_alloc(device_texture &mem)
|
||||
}
|
||||
|
||||
Mem *cmem = nullptr;
|
||||
CUarray array_3d = nullptr;
|
||||
|
||||
if (!mem.is_resident(this)) {
|
||||
thread_scoped_lock lock(device_mem_map_mutex);
|
||||
cmem = &device_mem_map[&mem];
|
||||
cmem->texobject = 0;
|
||||
|
||||
if (mem.data_depth > 1) {
|
||||
array_3d = (CUarray)mem.device_pointer;
|
||||
cmem->array = reinterpret_cast<arrayMemObject>(array_3d);
|
||||
}
|
||||
}
|
||||
else if (mem.data_depth > 1) {
|
||||
/* 3D texture using array, there is no API for linear memory. */
|
||||
CUDA_ARRAY3D_DESCRIPTOR desc;
|
||||
|
||||
desc.Width = mem.data_width;
|
||||
desc.Height = mem.data_height;
|
||||
desc.Depth = mem.data_depth;
|
||||
desc.Format = format;
|
||||
desc.NumChannels = mem.data_elements;
|
||||
desc.Flags = 0;
|
||||
|
||||
LOG(WORK) << "Array 3D allocate: " << mem.name << ", "
|
||||
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
||||
<< string_human_readable_size(mem.memory_size()) << ")";
|
||||
|
||||
cuda_assert(cuArray3DCreate(&array_3d, &desc));
|
||||
|
||||
if (!array_3d) {
|
||||
return;
|
||||
}
|
||||
|
||||
mem.device_pointer = (device_ptr)array_3d;
|
||||
mem.device_size = mem.memory_size();
|
||||
stats.mem_alloc(mem.memory_size());
|
||||
|
||||
const CUDA_MEMCPY3D param = tex_3d_copy_param(mem);
|
||||
cuda_assert(cuMemcpy3D(¶m));
|
||||
|
||||
thread_scoped_lock lock(device_mem_map_mutex);
|
||||
cmem = &device_mem_map[&mem];
|
||||
cmem->texobject = 0;
|
||||
cmem->array = reinterpret_cast<arrayMemObject>(array_3d);
|
||||
}
|
||||
else if (mem.data_height > 0) {
|
||||
/* 2D texture, using pitch aligned linear memory. */
|
||||
@@ -901,12 +844,7 @@ void CUDADevice::tex_alloc(device_texture &mem)
|
||||
CUDA_RESOURCE_DESC resDesc;
|
||||
memset(&resDesc, 0, sizeof(resDesc));
|
||||
|
||||
if (array_3d) {
|
||||
resDesc.resType = CU_RESOURCE_TYPE_ARRAY;
|
||||
resDesc.res.array.hArray = array_3d;
|
||||
resDesc.flags = 0;
|
||||
}
|
||||
else if (mem.data_height > 0) {
|
||||
if (mem.data_height > 0) {
|
||||
const size_t dst_pitch = align_up(tex_src_pitch(mem), pitch_alignment);
|
||||
|
||||
resDesc.resType = CU_RESOURCE_TYPE_PITCH2D;
|
||||
@@ -978,12 +916,7 @@ void CUDADevice::tex_copy_to(device_texture &mem)
|
||||
}
|
||||
else {
|
||||
/* Resident and fully allocated, only copy. */
|
||||
if (mem.data_depth > 0) {
|
||||
CUDAContextScope scope(this);
|
||||
const CUDA_MEMCPY3D param = tex_3d_copy_param(mem);
|
||||
cuda_assert(cuMemcpy3D(¶m));
|
||||
}
|
||||
else if (mem.data_height > 0) {
|
||||
if (mem.data_height > 0) {
|
||||
CUDAContextScope scope(this);
|
||||
const CUDA_MEMCPY2D param = tex_2d_copy_param(mem, pitch_alignment);
|
||||
cuda_assert(cuMemcpy2DUnaligned(¶m));
|
||||
|
||||
@@ -719,23 +719,6 @@ static hip_Memcpy2D tex_2d_copy_param(const device_texture &mem, const int pitch
|
||||
return param;
|
||||
}
|
||||
|
||||
static HIP_MEMCPY3D tex_3d_copy_param(const device_texture &mem)
|
||||
{
|
||||
const size_t src_pitch = tex_src_pitch(mem);
|
||||
|
||||
HIP_MEMCPY3D param;
|
||||
memset(¶m, 0, sizeof(HIP_MEMCPY3D));
|
||||
param.dstMemoryType = hipMemoryTypeArray;
|
||||
param.dstArray = (hArray)mem.device_pointer;
|
||||
param.srcMemoryType = hipMemoryTypeHost;
|
||||
param.srcHost = mem.host_pointer;
|
||||
param.srcPitch = src_pitch;
|
||||
param.WidthInBytes = param.srcPitch;
|
||||
param.Height = mem.data_height;
|
||||
param.Depth = mem.data_depth;
|
||||
return param;
|
||||
}
|
||||
|
||||
void HIPDevice::tex_alloc(device_texture &mem)
|
||||
{
|
||||
HIPContextScope scope(this);
|
||||
@@ -794,50 +777,11 @@ void HIPDevice::tex_alloc(device_texture &mem)
|
||||
}
|
||||
|
||||
Mem *cmem = nullptr;
|
||||
hArray array_3d = nullptr;
|
||||
|
||||
if (!mem.is_resident(this)) {
|
||||
thread_scoped_lock lock(device_mem_map_mutex);
|
||||
cmem = &device_mem_map[&mem];
|
||||
cmem->texobject = 0;
|
||||
|
||||
if (mem.data_depth > 1) {
|
||||
array_3d = (hArray)mem.device_pointer;
|
||||
cmem->array = reinterpret_cast<arrayMemObject>(array_3d);
|
||||
}
|
||||
}
|
||||
else if (mem.data_depth > 1) {
|
||||
/* 3D texture using array, there is no API for linear memory. */
|
||||
HIP_ARRAY3D_DESCRIPTOR desc;
|
||||
|
||||
desc.Width = mem.data_width;
|
||||
desc.Height = mem.data_height;
|
||||
desc.Depth = mem.data_depth;
|
||||
desc.Format = format;
|
||||
desc.NumChannels = mem.data_elements;
|
||||
desc.Flags = 0;
|
||||
|
||||
LOG(WORK) << "Array 3D allocate: " << mem.name << ", "
|
||||
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
||||
<< string_human_readable_size(mem.memory_size()) << ")";
|
||||
|
||||
hip_assert(hipArray3DCreate((hArray *)&array_3d, &desc));
|
||||
|
||||
if (!array_3d) {
|
||||
return;
|
||||
}
|
||||
|
||||
mem.device_pointer = (device_ptr)array_3d;
|
||||
mem.device_size = mem.memory_size();
|
||||
stats.mem_alloc(mem.memory_size());
|
||||
|
||||
const HIP_MEMCPY3D param = tex_3d_copy_param(mem);
|
||||
hip_assert(hipDrvMemcpy3D(¶m));
|
||||
|
||||
thread_scoped_lock lock(device_mem_map_mutex);
|
||||
cmem = &device_mem_map[&mem];
|
||||
cmem->texobject = 0;
|
||||
cmem->array = reinterpret_cast<arrayMemObject>(array_3d);
|
||||
}
|
||||
else if (mem.data_height > 0) {
|
||||
/* 2D texture, using pitch aligned linear memory. */
|
||||
@@ -870,12 +814,7 @@ void HIPDevice::tex_alloc(device_texture &mem)
|
||||
hipResourceDesc resDesc;
|
||||
memset(&resDesc, 0, sizeof(resDesc));
|
||||
|
||||
if (array_3d) {
|
||||
resDesc.resType = hipResourceTypeArray;
|
||||
resDesc.res.array.h_Array = array_3d;
|
||||
resDesc.flags = 0;
|
||||
}
|
||||
else if (mem.data_height > 0) {
|
||||
if (mem.data_height > 0) {
|
||||
const size_t dst_pitch = align_up(tex_src_pitch(mem), pitch_alignment);
|
||||
|
||||
resDesc.resType = hipResourceTypePitch2D;
|
||||
@@ -949,12 +888,7 @@ void HIPDevice::tex_copy_to(device_texture &mem)
|
||||
}
|
||||
else {
|
||||
/* Resident and fully allocated, only copy. */
|
||||
if (mem.data_depth > 0) {
|
||||
HIPContextScope scope(this);
|
||||
const HIP_MEMCPY3D param = tex_3d_copy_param(mem);
|
||||
hip_assert(hipDrvMemcpy3D(¶m));
|
||||
}
|
||||
else if (mem.data_height > 0) {
|
||||
if (mem.data_height > 0) {
|
||||
HIPContextScope scope(this);
|
||||
const hip_Memcpy2D param = tex_2d_copy_param(mem, pitch_alignment);
|
||||
hip_assert(hipDrvMemcpy2DUnaligned(¶m));
|
||||
|
||||
@@ -16,7 +16,6 @@ device_memory::device_memory(Device *device, const char *_name, MemoryType type)
|
||||
device_size(0),
|
||||
data_width(0),
|
||||
data_height(0),
|
||||
data_depth(0),
|
||||
type(type),
|
||||
name_storage(_name),
|
||||
device(device),
|
||||
@@ -70,7 +69,6 @@ void device_memory::host_and_device_free()
|
||||
data_size = 0;
|
||||
data_width = 0;
|
||||
data_height = 0;
|
||||
data_depth = 0;
|
||||
}
|
||||
|
||||
void device_memory::device_alloc()
|
||||
@@ -218,9 +216,9 @@ device_texture::~device_texture()
|
||||
}
|
||||
|
||||
/* Host memory allocation. */
|
||||
void *device_texture::alloc(const size_t width, const size_t height, const size_t depth)
|
||||
void *device_texture::alloc(const size_t width, const size_t height)
|
||||
{
|
||||
const size_t new_size = size(width, height, depth);
|
||||
const size_t new_size = size(width, height);
|
||||
|
||||
if (new_size != data_size) {
|
||||
host_and_device_free();
|
||||
@@ -231,11 +229,9 @@ void *device_texture::alloc(const size_t width, const size_t height, const size_
|
||||
data_size = new_size;
|
||||
data_width = width;
|
||||
data_height = height;
|
||||
data_depth = depth;
|
||||
|
||||
info.width = width;
|
||||
info.height = height;
|
||||
info.depth = depth;
|
||||
|
||||
return host_pointer;
|
||||
}
|
||||
|
||||
@@ -235,7 +235,6 @@ class device_memory {
|
||||
size_t device_size;
|
||||
size_t data_width;
|
||||
size_t data_height;
|
||||
size_t data_depth;
|
||||
MemoryType type;
|
||||
const char *name;
|
||||
string name_storage;
|
||||
@@ -386,9 +385,9 @@ template<typename T> class device_vector : public device_memory {
|
||||
}
|
||||
|
||||
/* Host memory allocation. */
|
||||
T *alloc(const size_t width, const size_t height = 0, const size_t depth = 0)
|
||||
T *alloc(const size_t width, const size_t height = 0)
|
||||
{
|
||||
size_t new_size = size(width, height, depth);
|
||||
size_t new_size = size(width, height);
|
||||
|
||||
if (new_size != data_size) {
|
||||
host_and_device_free();
|
||||
@@ -400,7 +399,6 @@ template<typename T> class device_vector : public device_memory {
|
||||
data_size = new_size;
|
||||
data_width = width;
|
||||
data_height = height;
|
||||
data_depth = depth;
|
||||
|
||||
return data();
|
||||
}
|
||||
@@ -408,9 +406,9 @@ template<typename T> class device_vector : public device_memory {
|
||||
/* Host memory resize. Only use this if the original data needs to be
|
||||
* preserved or memory needs to be initialized, it is faster to call
|
||||
* alloc() if it can be discarded. */
|
||||
T *resize(const size_t width, const size_t height = 0, const size_t depth = 0)
|
||||
T *resize(const size_t width, const size_t height = 0)
|
||||
{
|
||||
size_t new_size = size(width, height, depth);
|
||||
size_t new_size = size(width, height);
|
||||
|
||||
if (new_size != data_size) {
|
||||
void *new_ptr = host_alloc(sizeof(T) * new_size);
|
||||
@@ -433,7 +431,6 @@ template<typename T> class device_vector : public device_memory {
|
||||
data_size = new_size;
|
||||
data_width = width;
|
||||
data_height = height;
|
||||
data_depth = depth;
|
||||
|
||||
return data();
|
||||
}
|
||||
@@ -446,7 +443,6 @@ template<typename T> class device_vector : public device_memory {
|
||||
data_size = from.size();
|
||||
data_width = 0;
|
||||
data_height = 0;
|
||||
data_depth = 0;
|
||||
host_pointer = from.steal_pointer();
|
||||
assert(device_pointer == 0);
|
||||
}
|
||||
@@ -459,7 +455,6 @@ template<typename T> class device_vector : public device_memory {
|
||||
data_size = 0;
|
||||
data_width = 0;
|
||||
data_height = 0;
|
||||
data_depth = 0;
|
||||
host_pointer = 0;
|
||||
modified = true;
|
||||
need_realloc_ = true;
|
||||
@@ -553,9 +548,9 @@ template<typename T> class device_vector : public device_memory {
|
||||
}
|
||||
|
||||
protected:
|
||||
size_t size(const size_t width, const size_t height, const size_t depth)
|
||||
size_t size(const size_t width, const size_t height)
|
||||
{
|
||||
return width * ((height == 0) ? 1 : height) * ((depth == 0) ? 1 : depth);
|
||||
return width * ((height == 0) ? 1 : height);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -600,16 +595,16 @@ class device_texture : public device_memory {
|
||||
ExtensionType extension);
|
||||
~device_texture() override;
|
||||
|
||||
void *alloc(const size_t width, const size_t height, const size_t depth = 0);
|
||||
void *alloc(const size_t width, const size_t height);
|
||||
void copy_to_device();
|
||||
|
||||
uint slot = 0;
|
||||
TextureInfo info;
|
||||
|
||||
protected:
|
||||
size_t size(const size_t width, const size_t height, const size_t depth)
|
||||
size_t size(const size_t width, const size_t height)
|
||||
{
|
||||
return width * ((height == 0) ? 1 : height) * ((depth == 0) ? 1 : depth);
|
||||
return width * ((height == 0) ? 1 : height);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -530,7 +530,7 @@ void MetalDevice::compile_and_load(const int device_id, MetalPipelineType pso_ty
|
||||
|
||||
bool MetalDevice::is_texture(const TextureInfo &tex)
|
||||
{
|
||||
return (tex.depth > 0 || tex.height > 0);
|
||||
return tex.height > 0;
|
||||
}
|
||||
|
||||
void MetalDevice::load_texture_info() {}
|
||||
@@ -1022,43 +1022,7 @@ void MetalDevice::tex_alloc(device_texture &mem)
|
||||
id<MTLTexture> mtlTexture = nil;
|
||||
size_t src_pitch = mem.data_width * datatype_size(mem.data_type) * mem.data_elements;
|
||||
|
||||
if (mem.data_depth > 1) {
|
||||
/* 3D texture using array */
|
||||
MTLTextureDescriptor *desc;
|
||||
|
||||
desc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:format
|
||||
width:mem.data_width
|
||||
height:mem.data_height
|
||||
mipmapped:NO];
|
||||
|
||||
desc.storageMode = MTLStorageModeShared;
|
||||
desc.usage = MTLTextureUsageShaderRead;
|
||||
|
||||
desc.textureType = MTLTextureType3D;
|
||||
desc.depth = mem.data_depth;
|
||||
|
||||
LOG(WORK) << "Texture 3D allocate: " << mem.name << ", "
|
||||
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
||||
<< string_human_readable_size(mem.memory_size()) << ")";
|
||||
|
||||
mtlTexture = [mtlDevice newTextureWithDescriptor:desc];
|
||||
if (!mtlTexture) {
|
||||
set_error("System is out of GPU memory");
|
||||
return;
|
||||
}
|
||||
|
||||
const size_t imageBytes = src_pitch * mem.data_height;
|
||||
for (size_t d = 0; d < mem.data_depth; d++) {
|
||||
const size_t offset = d * imageBytes;
|
||||
[mtlTexture replaceRegion:MTLRegionMake3D(0, 0, d, mem.data_width, mem.data_height, 1)
|
||||
mipmapLevel:0
|
||||
slice:0
|
||||
withBytes:(uint8_t *)mem.host_pointer + offset
|
||||
bytesPerRow:src_pitch
|
||||
bytesPerImage:0];
|
||||
}
|
||||
}
|
||||
else if (mem.data_height > 0) {
|
||||
if (mem.data_height > 0) {
|
||||
/* 2D texture */
|
||||
MTLTextureDescriptor *desc;
|
||||
|
||||
@@ -1145,24 +1109,7 @@ void MetalDevice::tex_copy_to(device_texture &mem)
|
||||
if (mem.is_resident(this)) {
|
||||
const size_t src_pitch = mem.data_width * datatype_size(mem.data_type) * mem.data_elements;
|
||||
|
||||
if (mem.data_depth > 0) {
|
||||
id<MTLTexture> mtlTexture;
|
||||
{
|
||||
std::lock_guard<std::recursive_mutex> lock(metal_mem_map_mutex);
|
||||
mtlTexture = metal_mem_map.at(&mem)->mtlTexture;
|
||||
}
|
||||
const size_t imageBytes = src_pitch * mem.data_height;
|
||||
for (size_t d = 0; d < mem.data_depth; d++) {
|
||||
const size_t offset = d * imageBytes;
|
||||
[mtlTexture replaceRegion:MTLRegionMake3D(0, 0, d, mem.data_width, mem.data_height, 1)
|
||||
mipmapLevel:0
|
||||
slice:0
|
||||
withBytes:(uint8_t *)mem.host_pointer + offset
|
||||
bytesPerRow:src_pitch
|
||||
bytesPerImage:0];
|
||||
}
|
||||
}
|
||||
else if (mem.data_height > 0) {
|
||||
if (mem.data_height > 0) {
|
||||
id<MTLTexture> mtlTexture;
|
||||
{
|
||||
std::lock_guard<std::recursive_mutex> lock(metal_mem_map_mutex);
|
||||
@@ -1182,7 +1129,7 @@ void MetalDevice::tex_copy_to(device_texture &mem)
|
||||
void MetalDevice::tex_free(device_texture &mem)
|
||||
{
|
||||
int slot = mem.slot;
|
||||
if (mem.data_depth == 0 && mem.data_height == 0) {
|
||||
if (mem.data_height == 0) {
|
||||
generic_free(mem);
|
||||
}
|
||||
else if (metal_mem_map.count(&mem)) {
|
||||
|
||||
@@ -688,7 +688,6 @@ static sycl::ext::oneapi::experimental::image_descriptor image_desc(const device
|
||||
sycl::ext::oneapi::experimental::image_descriptor param;
|
||||
param.width = mem.data_width;
|
||||
param.height = mem.data_height;
|
||||
param.depth = mem.data_depth == 1 ? 0 : mem.data_depth;
|
||||
param.num_channels = mem.data_elements;
|
||||
param.channel_type = channel_type;
|
||||
|
||||
@@ -760,44 +759,22 @@ void OneapiDevice::tex_alloc(device_texture &mem)
|
||||
|
||||
if (mem.data_height > 0) {
|
||||
const sycl::device &device = reinterpret_cast<sycl::queue *>(queue)->get_device();
|
||||
if (mem.data_depth > 1) {
|
||||
const size_t max_width = device.get_info<sycl::info::device::image3d_max_width>();
|
||||
const size_t max_height = device.get_info<sycl::info::device::image3d_max_height>();
|
||||
const size_t max_depth = device.get_info<sycl::info::device::image3d_max_depth>();
|
||||
const size_t max_width = device.get_info<sycl::info::device::image2d_max_width>();
|
||||
const size_t max_height = device.get_info<sycl::info::device::image2d_max_height>();
|
||||
|
||||
if (mem.data_width > max_width || mem.data_height > max_height ||
|
||||
mem.data_depth > max_depth)
|
||||
{
|
||||
set_error(string_printf(
|
||||
"Maximum GPU 3D texture size exceeded (max %zux%zux%zu, found %zux%zux%zu)",
|
||||
max_width,
|
||||
max_height,
|
||||
max_depth,
|
||||
mem.data_width,
|
||||
mem.data_height,
|
||||
mem.data_depth));
|
||||
return;
|
||||
}
|
||||
}
|
||||
else {
|
||||
const size_t max_width = device.get_info<sycl::info::device::image2d_max_width>();
|
||||
const size_t max_height = device.get_info<sycl::info::device::image2d_max_height>();
|
||||
|
||||
if (mem.data_width > max_width || mem.data_height > max_height) {
|
||||
set_error(
|
||||
string_printf("Maximum GPU 2D texture size exceeded (max %zux%zu, found %zux%zu)",
|
||||
max_width,
|
||||
max_height,
|
||||
mem.data_width,
|
||||
mem.data_height));
|
||||
return;
|
||||
}
|
||||
if (mem.data_width > max_width || mem.data_height > max_height) {
|
||||
set_error(
|
||||
string_printf("Maximum GPU 2D texture size exceeded (max %zux%zu, found %zux%zu)",
|
||||
max_width,
|
||||
max_height,
|
||||
mem.data_width,
|
||||
mem.data_height));
|
||||
return;
|
||||
}
|
||||
|
||||
/* 2D/3D texture -- Tile optimized */
|
||||
size_t depth = mem.data_depth == 1 ? 0 : mem.data_depth;
|
||||
/* 2D texture -- Tile optimized */
|
||||
desc = sycl::ext::oneapi::experimental::image_descriptor(
|
||||
{mem.data_width, mem.data_height, depth}, mem.data_elements, channel_type);
|
||||
{mem.data_width, mem.data_height, 0}, mem.data_elements, channel_type);
|
||||
|
||||
LOG(WORK) << "Array 2D/3D allocate: " << mem.name << ", "
|
||||
<< string_human_readable_number(mem.memory_size()) << " bytes. ("
|
||||
|
||||
@@ -108,102 +108,6 @@ template<typename TexT, typename OutT = float4> struct TextureInterpolator {
|
||||
return read(data[y * width + x]);
|
||||
}
|
||||
|
||||
/* Read 3D Texture Data
|
||||
* Does not check if data request is in bounds. */
|
||||
static ccl_always_inline OutT read(const TexT *data,
|
||||
const int x,
|
||||
int y,
|
||||
const int z,
|
||||
int width,
|
||||
const int height,
|
||||
const int /*depth*/)
|
||||
{
|
||||
return read(data[x + y * width + z * width * height]);
|
||||
}
|
||||
|
||||
/* Read 3D Texture Data Clip
|
||||
* Returns transparent black if data request is out of bounds. */
|
||||
static ccl_always_inline OutT read_clip(const TexT *data,
|
||||
const int x,
|
||||
int y,
|
||||
const int z,
|
||||
int width,
|
||||
const int height,
|
||||
const int depth)
|
||||
{
|
||||
if (x < 0 || x >= width || y < 0 || y >= height || z < 0 || z >= depth) {
|
||||
return zero();
|
||||
}
|
||||
return read(data[x + y * width + z * width * height]);
|
||||
}
|
||||
|
||||
/* Trilinear Interpolation */
|
||||
static ccl_always_inline OutT
|
||||
trilinear_lookup(const TexT *data,
|
||||
const float tx,
|
||||
const float ty,
|
||||
const float tz,
|
||||
const int ix,
|
||||
const int iy,
|
||||
const int iz,
|
||||
const int nix,
|
||||
const int niy,
|
||||
const int niz,
|
||||
const int width,
|
||||
const int height,
|
||||
const int depth,
|
||||
OutT read(const TexT *, int, int, int, int, int, int))
|
||||
{
|
||||
OutT r = (1.0f - tz) * (1.0f - ty) * (1.0f - tx) *
|
||||
read(data, ix, iy, iz, width, height, depth);
|
||||
r += (1.0f - tz) * (1.0f - ty) * tx * read(data, nix, iy, iz, width, height, depth);
|
||||
r += (1.0f - tz) * ty * (1.0f - tx) * read(data, ix, niy, iz, width, height, depth);
|
||||
r += (1.0f - tz) * ty * tx * read(data, nix, niy, iz, width, height, depth);
|
||||
|
||||
r += tz * (1.0f - ty) * (1.0f - tx) * read(data, ix, iy, niz, width, height, depth);
|
||||
r += tz * (1.0f - ty) * tx * read(data, nix, iy, niz, width, height, depth);
|
||||
r += tz * ty * (1.0f - tx) * read(data, ix, niy, niz, width, height, depth);
|
||||
r += tz * ty * tx * read(data, nix, niy, niz, width, height, depth);
|
||||
return r;
|
||||
}
|
||||
|
||||
/** Tricubic Interpolation */
|
||||
static ccl_always_inline OutT
|
||||
tricubic_lookup(const TexT *data,
|
||||
const float tx,
|
||||
const float ty,
|
||||
const float tz,
|
||||
const int xc[4],
|
||||
const int yc[4],
|
||||
const int zc[4],
|
||||
const int width,
|
||||
const int height,
|
||||
const int depth,
|
||||
OutT read(const TexT *, int, int, int, int, int, int))
|
||||
{
|
||||
float u[4], v[4], w[4];
|
||||
|
||||
/* Some helper macros to keep code size reasonable.
|
||||
* Lets the compiler inline all the matrix multiplications.
|
||||
*/
|
||||
#define DATA(x, y, z) (read(data, xc[x], yc[y], zc[z], width, height, depth))
|
||||
#define COL_TERM(col, row) \
|
||||
(v[col] * (u[0] * DATA(0, col, row) + u[1] * DATA(1, col, row) + u[2] * DATA(2, col, row) + \
|
||||
u[3] * DATA(3, col, row)))
|
||||
#define ROW_TERM(row) \
|
||||
(w[row] * (COL_TERM(0, row) + COL_TERM(1, row) + COL_TERM(2, row) + COL_TERM(3, row)))
|
||||
|
||||
SET_CUBIC_SPLINE_WEIGHTS(u, tx);
|
||||
SET_CUBIC_SPLINE_WEIGHTS(v, ty);
|
||||
SET_CUBIC_SPLINE_WEIGHTS(w, tz);
|
||||
/* Actual interpolation. */
|
||||
return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3);
|
||||
|
||||
#undef COL_TERM
|
||||
#undef ROW_TERM
|
||||
#undef DATA
|
||||
}
|
||||
|
||||
static ccl_always_inline int wrap_periodic(int x, const int width)
|
||||
{
|
||||
x %= width;
|
||||
@@ -418,286 +322,6 @@ template<typename TexT, typename OutT = float4> struct TextureInterpolator {
|
||||
return interp_cubic(info, x, y);
|
||||
}
|
||||
}
|
||||
|
||||
/* ******** 3D interpolation ******** */
|
||||
|
||||
static ccl_always_inline OutT interp_3d_closest(const TextureInfo &info,
|
||||
const float x,
|
||||
const float y,
|
||||
const float z)
|
||||
{
|
||||
const int width = info.width;
|
||||
const int height = info.height;
|
||||
const int depth = info.depth;
|
||||
int ix, iy, iz;
|
||||
|
||||
frac(x * (float)width, &ix);
|
||||
frac(y * (float)height, &iy);
|
||||
frac(z * (float)depth, &iz);
|
||||
|
||||
switch (info.extension) {
|
||||
case EXTENSION_REPEAT:
|
||||
ix = wrap_periodic(ix, width);
|
||||
iy = wrap_periodic(iy, height);
|
||||
iz = wrap_periodic(iz, depth);
|
||||
break;
|
||||
case EXTENSION_CLIP:
|
||||
/* No samples are inside the clip region. */
|
||||
if (ix < 0 || ix >= width || iy < 0 || iy >= height || iz < 0 || iz >= depth) {
|
||||
return zero();
|
||||
}
|
||||
break;
|
||||
case EXTENSION_EXTEND:
|
||||
ix = wrap_clamp(ix, width);
|
||||
iy = wrap_clamp(iy, height);
|
||||
iz = wrap_clamp(iz, depth);
|
||||
break;
|
||||
case EXTENSION_MIRROR:
|
||||
ix = wrap_mirror(ix, width);
|
||||
iy = wrap_mirror(iy, height);
|
||||
iz = wrap_mirror(iz, depth);
|
||||
break;
|
||||
default:
|
||||
kernel_assert(0);
|
||||
return zero();
|
||||
}
|
||||
|
||||
const TexT *data = (const TexT *)info.data;
|
||||
return read(data, ix, iy, iz, width, height, depth);
|
||||
}
|
||||
|
||||
static ccl_always_inline OutT interp_3d_linear(const TextureInfo &info,
|
||||
const float x,
|
||||
const float y,
|
||||
const float z)
|
||||
{
|
||||
const int width = info.width;
|
||||
const int height = info.height;
|
||||
const int depth = info.depth;
|
||||
int ix, iy, iz;
|
||||
int nix, niy, niz;
|
||||
|
||||
/* A -0.5 offset is used to center the linear samples around the sample point. */
|
||||
float tx = frac(x * (float)width - 0.5f, &ix);
|
||||
float ty = frac(y * (float)height - 0.5f, &iy);
|
||||
float tz = frac(z * (float)depth - 0.5f, &iz);
|
||||
|
||||
switch (info.extension) {
|
||||
case EXTENSION_REPEAT:
|
||||
ix = wrap_periodic(ix, width);
|
||||
nix = wrap_periodic(ix + 1, width);
|
||||
|
||||
iy = wrap_periodic(iy, height);
|
||||
niy = wrap_periodic(iy + 1, height);
|
||||
|
||||
iz = wrap_periodic(iz, depth);
|
||||
niz = wrap_periodic(iz + 1, depth);
|
||||
break;
|
||||
case EXTENSION_CLIP:
|
||||
/* No linear samples are inside the clip region. */
|
||||
if (ix < -1 || ix >= width || iy < -1 || iy >= height || iz < -1 || iz >= depth) {
|
||||
return zero();
|
||||
}
|
||||
|
||||
nix = ix + 1;
|
||||
niy = iy + 1;
|
||||
niz = iz + 1;
|
||||
|
||||
/* All linear samples are inside the clip region. */
|
||||
if (ix >= 0 && nix < width && iy >= 0 && niy < height && iz >= 0 && niz < depth) {
|
||||
break;
|
||||
}
|
||||
|
||||
/* The linear samples span the clip border.
|
||||
* #read_clip is used to ensure proper interpolation across the clip border. */
|
||||
return trilinear_lookup((const TexT *)info.data,
|
||||
tx,
|
||||
ty,
|
||||
tz,
|
||||
ix,
|
||||
iy,
|
||||
iz,
|
||||
nix,
|
||||
niy,
|
||||
niz,
|
||||
width,
|
||||
height,
|
||||
depth,
|
||||
read_clip);
|
||||
case EXTENSION_EXTEND:
|
||||
nix = wrap_clamp(ix + 1, width);
|
||||
ix = wrap_clamp(ix, width);
|
||||
|
||||
niy = wrap_clamp(iy + 1, height);
|
||||
iy = wrap_clamp(iy, height);
|
||||
|
||||
niz = wrap_clamp(iz + 1, depth);
|
||||
iz = wrap_clamp(iz, depth);
|
||||
break;
|
||||
case EXTENSION_MIRROR:
|
||||
nix = wrap_mirror(ix + 1, width);
|
||||
ix = wrap_mirror(ix, width);
|
||||
|
||||
niy = wrap_mirror(iy + 1, height);
|
||||
iy = wrap_mirror(iy, height);
|
||||
|
||||
niz = wrap_mirror(iz + 1, depth);
|
||||
iz = wrap_mirror(iz, depth);
|
||||
break;
|
||||
default:
|
||||
kernel_assert(0);
|
||||
return zero();
|
||||
}
|
||||
|
||||
return trilinear_lookup((const TexT *)info.data,
|
||||
tx,
|
||||
ty,
|
||||
tz,
|
||||
ix,
|
||||
iy,
|
||||
iz,
|
||||
nix,
|
||||
niy,
|
||||
niz,
|
||||
width,
|
||||
height,
|
||||
depth,
|
||||
read);
|
||||
}
|
||||
|
||||
/* Tricubic b-spline interpolation.
|
||||
*
|
||||
* TODO(sergey): For some unspeakable reason both GCC-6 and Clang-3.9 are
|
||||
* causing stack overflow issue in this function unless it is inlined.
|
||||
*
|
||||
* Only happens for AVX2 kernel and global __KERNEL_SSE__ vectorization
|
||||
* enabled.
|
||||
*/
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
static ccl_always_inline
|
||||
#else
|
||||
static ccl_never_inline
|
||||
#endif
|
||||
OutT
|
||||
interp_3d_cubic(const TextureInfo &info, const float x, float y, const float z)
|
||||
{
|
||||
int width = info.width;
|
||||
int height = info.height;
|
||||
int depth = info.depth;
|
||||
int ix, iy, iz;
|
||||
|
||||
/* A -0.5 offset is used to center the cubic samples around the sample point. */
|
||||
const float tx = frac(x * (float)width - 0.5f, &ix);
|
||||
const float ty = frac(y * (float)height - 0.5f, &iy);
|
||||
const float tz = frac(z * (float)depth - 0.5f, &iz);
|
||||
|
||||
int pix, piy, piz;
|
||||
int nix, niy, niz;
|
||||
int nnix, nniy, nniz;
|
||||
|
||||
switch (info.extension) {
|
||||
case EXTENSION_REPEAT:
|
||||
ix = wrap_periodic(ix, width);
|
||||
pix = wrap_periodic(ix - 1, width);
|
||||
nix = wrap_periodic(ix + 1, width);
|
||||
nnix = wrap_periodic(ix + 2, width);
|
||||
|
||||
iy = wrap_periodic(iy, height);
|
||||
niy = wrap_periodic(iy + 1, height);
|
||||
piy = wrap_periodic(iy - 1, height);
|
||||
nniy = wrap_periodic(iy + 2, height);
|
||||
|
||||
iz = wrap_periodic(iz, depth);
|
||||
piz = wrap_periodic(iz - 1, depth);
|
||||
niz = wrap_periodic(iz + 1, depth);
|
||||
nniz = wrap_periodic(iz + 2, depth);
|
||||
break;
|
||||
case EXTENSION_CLIP: {
|
||||
/* No cubic samples are inside the clip region. */
|
||||
if (ix < -2 || ix > width || iy < -2 || iy > height || iz < -2 || iz > depth) {
|
||||
return zero();
|
||||
}
|
||||
|
||||
pix = ix - 1;
|
||||
nnix = ix + 2;
|
||||
nix = ix + 1;
|
||||
|
||||
piy = iy - 1;
|
||||
niy = iy + 1;
|
||||
nniy = iy + 2;
|
||||
|
||||
piz = iz - 1;
|
||||
niz = iz + 1;
|
||||
nniz = iz + 2;
|
||||
|
||||
/* All cubic samples are inside the clip region. */
|
||||
if (pix >= 0 && nnix < width && piy >= 0 && nniy < height && piz >= 0 && nniz < depth) {
|
||||
break;
|
||||
}
|
||||
|
||||
/* The Cubic samples span the clip border.
|
||||
* read_clip is used to ensure proper interpolation across the clip border. */
|
||||
const int xc[4] = {pix, ix, nix, nnix};
|
||||
const int yc[4] = {piy, iy, niy, nniy};
|
||||
const int zc[4] = {piz, iz, niz, nniz};
|
||||
return tricubic_lookup(
|
||||
(const TexT *)info.data, tx, ty, tz, xc, yc, zc, width, height, depth, read_clip);
|
||||
}
|
||||
case EXTENSION_EXTEND:
|
||||
pix = wrap_clamp(ix - 1, width);
|
||||
nix = wrap_clamp(ix + 1, width);
|
||||
nnix = wrap_clamp(ix + 2, width);
|
||||
ix = wrap_clamp(ix, width);
|
||||
|
||||
piy = wrap_clamp(iy - 1, height);
|
||||
niy = wrap_clamp(iy + 1, height);
|
||||
nniy = wrap_clamp(iy + 2, height);
|
||||
iy = wrap_clamp(iy, height);
|
||||
|
||||
piz = wrap_clamp(iz - 1, depth);
|
||||
niz = wrap_clamp(iz + 1, depth);
|
||||
nniz = wrap_clamp(iz + 2, depth);
|
||||
iz = wrap_clamp(iz, depth);
|
||||
break;
|
||||
case EXTENSION_MIRROR:
|
||||
pix = wrap_mirror(ix - 1, width);
|
||||
nix = wrap_mirror(ix + 1, width);
|
||||
nnix = wrap_mirror(ix + 2, width);
|
||||
ix = wrap_mirror(ix, width);
|
||||
|
||||
piy = wrap_mirror(iy - 1, height);
|
||||
niy = wrap_mirror(iy + 1, height);
|
||||
nniy = wrap_mirror(iy + 2, height);
|
||||
iy = wrap_mirror(iy, height);
|
||||
|
||||
piz = wrap_mirror(iz - 1, depth);
|
||||
niz = wrap_mirror(iz + 1, depth);
|
||||
nniz = wrap_mirror(iz + 2, depth);
|
||||
iz = wrap_mirror(iz, depth);
|
||||
break;
|
||||
default:
|
||||
kernel_assert(0);
|
||||
return zero();
|
||||
}
|
||||
const int xc[4] = {pix, ix, nix, nnix};
|
||||
const int yc[4] = {piy, iy, niy, nniy};
|
||||
const int zc[4] = {piz, iz, niz, nniz};
|
||||
const TexT *data = (const TexT *)info.data;
|
||||
return tricubic_lookup(data, tx, ty, tz, xc, yc, zc, width, height, depth, read);
|
||||
}
|
||||
|
||||
static ccl_always_inline OutT interp_3d(
|
||||
const TextureInfo &info, const float x, float y, const float z, InterpolationType interp)
|
||||
{
|
||||
switch ((interp == INTERPOLATION_NONE) ? info.interpolation : interp) {
|
||||
case INTERPOLATION_CLOSEST:
|
||||
return interp_3d_closest(info, x, y, z);
|
||||
case INTERPOLATION_LINEAR:
|
||||
return interp_3d_linear(info, x, y, z);
|
||||
default:
|
||||
return interp_3d_cubic(info, x, y, z);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
#ifdef WITH_NANOVDB
|
||||
@@ -891,30 +515,6 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals kg,
|
||||
P = transform_point(&info.transform_3d, P);
|
||||
}
|
||||
switch (info.data_type) {
|
||||
case IMAGE_DATA_TYPE_HALF: {
|
||||
const float f = TextureInterpolator<half, float>::interp_3d(info, P.x, P.y, P.z, interp);
|
||||
return make_float4(f, f, f, 1.0f);
|
||||
}
|
||||
case IMAGE_DATA_TYPE_BYTE: {
|
||||
const float f = TextureInterpolator<uchar, float>::interp_3d(info, P.x, P.y, P.z, interp);
|
||||
return make_float4(f, f, f, 1.0f);
|
||||
}
|
||||
case IMAGE_DATA_TYPE_USHORT: {
|
||||
const float f = TextureInterpolator<uint16_t, float>::interp_3d(info, P.x, P.y, P.z, interp);
|
||||
return make_float4(f, f, f, 1.0f);
|
||||
}
|
||||
case IMAGE_DATA_TYPE_FLOAT: {
|
||||
const float f = TextureInterpolator<float, float>::interp_3d(info, P.x, P.y, P.z, interp);
|
||||
return make_float4(f, f, f, 1.0f);
|
||||
}
|
||||
case IMAGE_DATA_TYPE_HALF4:
|
||||
return TextureInterpolator<half4>::interp_3d(info, P.x, P.y, P.z, interp);
|
||||
case IMAGE_DATA_TYPE_BYTE4:
|
||||
return TextureInterpolator<uchar4>::interp_3d(info, P.x, P.y, P.z, interp);
|
||||
case IMAGE_DATA_TYPE_USHORT4:
|
||||
return TextureInterpolator<ushort4>::interp_3d(info, P.x, P.y, P.z, interp);
|
||||
case IMAGE_DATA_TYPE_FLOAT4:
|
||||
return TextureInterpolator<float4>::interp_3d(info, P.x, P.y, P.z, interp);
|
||||
#ifdef WITH_NANOVDB
|
||||
case IMAGE_DATA_TYPE_NANOVDB_FLOAT: {
|
||||
const float f = NanoVDBInterpolator<float, float>::interp_3d(info, P.x, P.y, P.z, interp);
|
||||
|
||||
@@ -81,7 +81,6 @@ typedef unsigned long long uint64_t;
|
||||
|
||||
typedef unsigned long long CUtexObject;
|
||||
typedef CUtexObject ccl_gpu_tex_object_2D;
|
||||
typedef CUtexObject ccl_gpu_tex_object_3D;
|
||||
|
||||
template<typename T>
|
||||
ccl_device_forceinline T ccl_gpu_tex_object_read_2D(const ccl_gpu_tex_object_2D texobj,
|
||||
@@ -91,15 +90,6 @@ ccl_device_forceinline T ccl_gpu_tex_object_read_2D(const ccl_gpu_tex_object_2D
|
||||
return tex2D<T>(texobj, x, y);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
ccl_device_forceinline T ccl_gpu_tex_object_read_3D(const ccl_gpu_tex_object_3D texobj,
|
||||
const float x,
|
||||
const float y,
|
||||
const float z)
|
||||
{
|
||||
return tex3D<T>(texobj, x, y, z);
|
||||
}
|
||||
|
||||
/* Use fast math functions */
|
||||
|
||||
#define cosf(x) __cosf(((float)(x)))
|
||||
|
||||
@@ -87,49 +87,6 @@ ccl_device_noinline T kernel_tex_image_interp_bicubic(const ccl_global TextureIn
|
||||
g1x * ccl_gpu_tex_object_read_2D<T>(tex, x1, y1));
|
||||
}
|
||||
|
||||
/* Fast tricubic texture lookup using 8 trilinear lookups. */
|
||||
template<typename T>
|
||||
ccl_device_noinline T
|
||||
kernel_tex_image_interp_tricubic(const ccl_global TextureInfo &info, float x, float y, float z)
|
||||
{
|
||||
ccl_gpu_tex_object_3D tex = (ccl_gpu_tex_object_3D)info.data;
|
||||
|
||||
x = (x * info.width) - 0.5f;
|
||||
y = (y * info.height) - 0.5f;
|
||||
z = (z * info.depth) - 0.5f;
|
||||
|
||||
float px = floorf(x);
|
||||
float py = floorf(y);
|
||||
float pz = floorf(z);
|
||||
float fx = x - px;
|
||||
float fy = y - py;
|
||||
float fz = z - pz;
|
||||
|
||||
float g0x = cubic_g0(fx);
|
||||
float g1x = cubic_g1(fx);
|
||||
float g0y = cubic_g0(fy);
|
||||
float g1y = cubic_g1(fy);
|
||||
float g0z = cubic_g0(fz);
|
||||
float g1z = cubic_g1(fz);
|
||||
|
||||
/* Note +0.5 offset to compensate for CUDA linear filtering convention. */
|
||||
float x0 = (px + cubic_h0(fx) + 0.5f) / info.width;
|
||||
float x1 = (px + cubic_h1(fx) + 0.5f) / info.width;
|
||||
float y0 = (py + cubic_h0(fy) + 0.5f) / info.height;
|
||||
float y1 = (py + cubic_h1(fy) + 0.5f) / info.height;
|
||||
float z0 = (pz + cubic_h0(fz) + 0.5f) / info.depth;
|
||||
float z1 = (pz + cubic_h1(fz) + 0.5f) / info.depth;
|
||||
|
||||
return g0z * (g0y * (g0x * ccl_gpu_tex_object_read_3D<T>(tex, x0, y0, z0) +
|
||||
g1x * ccl_gpu_tex_object_read_3D<T>(tex, x1, y0, z0)) +
|
||||
g1y * (g0x * ccl_gpu_tex_object_read_3D<T>(tex, x0, y1, z0) +
|
||||
g1x * ccl_gpu_tex_object_read_3D<T>(tex, x1, y1, z0))) +
|
||||
g1z * (g0y * (g0x * ccl_gpu_tex_object_read_3D<T>(tex, x0, y0, z1) +
|
||||
g1x * ccl_gpu_tex_object_read_3D<T>(tex, x1, y0, z1)) +
|
||||
g1y * (g0x * ccl_gpu_tex_object_read_3D<T>(tex, x0, y1, z1) +
|
||||
g1x * ccl_gpu_tex_object_read_3D<T>(tex, x1, y1, z1)));
|
||||
}
|
||||
|
||||
#ifdef WITH_NANOVDB
|
||||
template<typename OutT, typename Acc>
|
||||
ccl_device OutT kernel_tex_image_interp_trilinear_nanovdb(ccl_private Acc &acc,
|
||||
@@ -333,30 +290,9 @@ ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals kg,
|
||||
return make_float4(f, f, f, 1.0f);
|
||||
}
|
||||
#endif
|
||||
if (texture_type == IMAGE_DATA_TYPE_FLOAT4 || texture_type == IMAGE_DATA_TYPE_BYTE4 ||
|
||||
texture_type == IMAGE_DATA_TYPE_HALF4 || texture_type == IMAGE_DATA_TYPE_USHORT4)
|
||||
{
|
||||
if (interpolation == INTERPOLATION_CUBIC || interpolation == INTERPOLATION_SMART) {
|
||||
return kernel_tex_image_interp_tricubic<float4>(info, x, y, z);
|
||||
}
|
||||
else {
|
||||
ccl_gpu_tex_object_3D tex = (ccl_gpu_tex_object_3D)info.data;
|
||||
return ccl_gpu_tex_object_read_3D<float4>(tex, x, y, z);
|
||||
}
|
||||
}
|
||||
else {
|
||||
float f;
|
||||
|
||||
if (interpolation == INTERPOLATION_CUBIC || interpolation == INTERPOLATION_SMART) {
|
||||
f = kernel_tex_image_interp_tricubic<float>(info, x, y, z);
|
||||
}
|
||||
else {
|
||||
ccl_gpu_tex_object_3D tex = (ccl_gpu_tex_object_3D)info.data;
|
||||
f = ccl_gpu_tex_object_read_3D<float>(tex, x, y, z);
|
||||
}
|
||||
|
||||
return make_float4(f, f, f, 1.0f);
|
||||
}
|
||||
return make_float4(
|
||||
TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B, TEX_IMAGE_MISSING_A);
|
||||
}
|
||||
|
||||
CCL_NAMESPACE_END
|
||||
|
||||
@@ -78,7 +78,6 @@ typedef unsigned long long uint64_t;
|
||||
|
||||
/* GPU texture objects */
|
||||
typedef hipTextureObject_t ccl_gpu_tex_object_2D;
|
||||
typedef hipTextureObject_t ccl_gpu_tex_object_3D;
|
||||
|
||||
template<typename T>
|
||||
ccl_device_forceinline T ccl_gpu_tex_object_read_2D(const ccl_gpu_tex_object_2D texobj,
|
||||
@@ -88,15 +87,6 @@ ccl_device_forceinline T ccl_gpu_tex_object_read_2D(const ccl_gpu_tex_object_2D
|
||||
return tex2D<T>(texobj, x, y);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
ccl_device_forceinline T ccl_gpu_tex_object_read_3D(const ccl_gpu_tex_object_3D texobj,
|
||||
const float x,
|
||||
const float y,
|
||||
const float z)
|
||||
{
|
||||
return tex3D<T>(texobj, x, y, z);
|
||||
}
|
||||
|
||||
/* Use fast math functions */
|
||||
|
||||
#define cosf(x) __cosf(((float)(x)))
|
||||
|
||||
@@ -348,17 +348,13 @@ typedef metal::raytracing::intersector<triangle_data, curve_data, extended_limit
|
||||
|
||||
/* texture bindings and sampler setup */
|
||||
|
||||
/* TextureParamsMetal is reinterpreted as either Texture2DParamsMetal or Texture3DParamsMetal
|
||||
* depending on context. */
|
||||
/* TextureParamsMetal is reinterpreted as Texture2DParamsMetal. */
|
||||
struct TextureParamsMetal {
|
||||
uint64_t tex;
|
||||
};
|
||||
struct Texture2DParamsMetal {
|
||||
texture2d<float, access::sample> tex;
|
||||
};
|
||||
struct Texture3DParamsMetal {
|
||||
texture3d<float, access::sample> tex;
|
||||
};
|
||||
|
||||
#ifdef __METALRT__
|
||||
struct MetalRTBlasWrapper {
|
||||
|
||||
@@ -25,7 +25,6 @@ class MetalKernelContext {
|
||||
|
||||
/* texture fetch adapter functions */
|
||||
using ccl_gpu_tex_object_2D = uint64_t;
|
||||
using ccl_gpu_tex_object_3D = uint64_t;
|
||||
|
||||
template<typename T>
|
||||
inline __attribute__((__always_inline__))
|
||||
@@ -33,12 +32,6 @@ class MetalKernelContext {
|
||||
kernel_assert(0);
|
||||
return 0;
|
||||
}
|
||||
template<typename T>
|
||||
inline __attribute__((__always_inline__))
|
||||
T ccl_gpu_tex_object_read_3D(ccl_gpu_tex_object_3D tex, const float x, float y, const float z) const {
|
||||
kernel_assert(0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// texture2d
|
||||
template<>
|
||||
@@ -56,21 +49,6 @@ class MetalKernelContext {
|
||||
return ((ccl_global Texture2DParamsMetal*)metal_ancillaries->textures)[tid].tex.sample(metal_samplers[sid], float2(x, y)).x;
|
||||
}
|
||||
|
||||
// texture3d
|
||||
template<>
|
||||
inline __attribute__((__always_inline__))
|
||||
float4 ccl_gpu_tex_object_read_3D(ccl_gpu_tex_object_3D tex, const float x, float y, const float z) const {
|
||||
const uint tid(tex);
|
||||
const uint sid(tex >> 32);
|
||||
return ((ccl_global Texture3DParamsMetal*)metal_ancillaries->textures)[tid].tex.sample(metal_samplers[sid], float3(x, y, z));
|
||||
}
|
||||
template<>
|
||||
inline __attribute__((__always_inline__))
|
||||
float ccl_gpu_tex_object_read_3D(ccl_gpu_tex_object_3D tex, const float x, float y, const float z) const {
|
||||
const uint tid(tex);
|
||||
const uint sid(tex >> 32);
|
||||
return ((ccl_global Texture3DParamsMetal*)metal_ancillaries->textures)[tid].tex.sample(metal_samplers[sid], float3(x, y, z)).x;
|
||||
}
|
||||
# include "kernel/device/gpu/image.h"
|
||||
|
||||
// clang-format on
|
||||
|
||||
@@ -63,7 +63,6 @@ typedef unsigned long long uint64_t;
|
||||
|
||||
typedef unsigned long long CUtexObject;
|
||||
typedef CUtexObject ccl_gpu_tex_object_2D;
|
||||
typedef CUtexObject ccl_gpu_tex_object_3D;
|
||||
|
||||
template<typename T>
|
||||
ccl_device_forceinline T ccl_gpu_tex_object_read_2D(const ccl_gpu_tex_object_2D texobj,
|
||||
@@ -73,15 +72,6 @@ ccl_device_forceinline T ccl_gpu_tex_object_read_2D(const ccl_gpu_tex_object_2D
|
||||
return tex2D<T>(texobj, x, y);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
ccl_device_forceinline T ccl_gpu_tex_object_read_3D(const ccl_gpu_tex_object_3D texobj,
|
||||
const float x,
|
||||
const float y,
|
||||
const float z)
|
||||
{
|
||||
return tex3D<T>(texobj, x, y, z);
|
||||
}
|
||||
|
||||
/* Half */
|
||||
|
||||
typedef unsigned short half;
|
||||
|
||||
@@ -228,7 +228,6 @@ ImageMetaData::ImageMetaData()
|
||||
: channels(0),
|
||||
width(0),
|
||||
height(0),
|
||||
depth(0),
|
||||
byte_size(0),
|
||||
type(IMAGE_DATA_NUM_TYPES),
|
||||
colorspace(u_colorspace_raw),
|
||||
@@ -241,7 +240,7 @@ ImageMetaData::ImageMetaData()
|
||||
bool ImageMetaData::operator==(const ImageMetaData &other) const
|
||||
{
|
||||
return channels == other.channels && width == other.width && height == other.height &&
|
||||
depth == other.depth && use_transform_3d == other.use_transform_3d &&
|
||||
use_transform_3d == other.use_transform_3d &&
|
||||
(!use_transform_3d || transform_3d == other.transform_3d) && type == other.type &&
|
||||
colorspace == other.colorspace && compress_as_srgb == other.compress_as_srgb;
|
||||
}
|
||||
@@ -543,13 +542,12 @@ bool ImageManager::file_load_image(Image *img, const int texture_limit)
|
||||
/* Get metadata. */
|
||||
const int width = img->metadata.width;
|
||||
const int height = img->metadata.height;
|
||||
const int depth = img->metadata.depth;
|
||||
const int components = img->metadata.channels;
|
||||
|
||||
/* Read pixels. */
|
||||
vector<StorageType> pixels_storage;
|
||||
StorageType *pixels;
|
||||
const size_t max_size = max(max(width, height), depth);
|
||||
const size_t max_size = max(width, height);
|
||||
if (max_size == 0) {
|
||||
/* Don't bother with empty images. */
|
||||
return false;
|
||||
@@ -557,12 +555,12 @@ bool ImageManager::file_load_image(Image *img, const int texture_limit)
|
||||
|
||||
/* Allocate memory as needed, may be smaller to resize down. */
|
||||
if (texture_limit > 0 && max_size > texture_limit) {
|
||||
pixels_storage.resize(((size_t)width) * height * depth * 4);
|
||||
pixels_storage.resize(((size_t)width) * height * 4);
|
||||
pixels = &pixels_storage[0];
|
||||
}
|
||||
else {
|
||||
const thread_scoped_lock device_lock(device_mutex);
|
||||
pixels = (StorageType *)img->mem->alloc(width, height, depth);
|
||||
pixels = (StorageType *)img->mem->alloc(width, height);
|
||||
}
|
||||
|
||||
if (pixels == nullptr) {
|
||||
@@ -570,7 +568,7 @@ bool ImageManager::file_load_image(Image *img, const int texture_limit)
|
||||
return false;
|
||||
}
|
||||
|
||||
const size_t num_pixels = ((size_t)width) * height * depth;
|
||||
const size_t num_pixels = ((size_t)width) * height;
|
||||
img->loader->load_pixels(
|
||||
img->metadata, pixels, num_pixels * components, image_associate_alpha(img));
|
||||
|
||||
@@ -667,23 +665,20 @@ bool ImageManager::file_load_image(Image *img, const int texture_limit)
|
||||
vector<StorageType> scaled_pixels;
|
||||
size_t scaled_width;
|
||||
size_t scaled_height;
|
||||
size_t scaled_depth;
|
||||
util_image_resize_pixels(pixels_storage,
|
||||
width,
|
||||
height,
|
||||
depth,
|
||||
is_rgba ? 4 : 1,
|
||||
scale_factor,
|
||||
&scaled_pixels,
|
||||
&scaled_width,
|
||||
&scaled_height,
|
||||
&scaled_depth);
|
||||
&scaled_height);
|
||||
|
||||
StorageType *texture_pixels;
|
||||
|
||||
{
|
||||
const thread_scoped_lock device_lock(device_mutex);
|
||||
texture_pixels = (StorageType *)img->mem->alloc(scaled_width, scaled_height, scaled_depth);
|
||||
texture_pixels = (StorageType *)img->mem->alloc(scaled_width, scaled_height);
|
||||
}
|
||||
|
||||
memcpy(texture_pixels, &scaled_pixels[0], scaled_pixels.size() * sizeof(StorageType));
|
||||
|
||||
@@ -55,7 +55,7 @@ class ImageMetaData {
|
||||
public:
|
||||
/* Set by ImageLoader.load_metadata(). */
|
||||
int channels;
|
||||
size_t width, height, depth;
|
||||
size_t width, height;
|
||||
size_t byte_size;
|
||||
ImageDataType type;
|
||||
|
||||
|
||||
@@ -41,7 +41,6 @@ bool OIIOImageLoader::load_metadata(const ImageDeviceFeatures & /*features*/,
|
||||
|
||||
metadata.width = spec.width;
|
||||
metadata.height = spec.height;
|
||||
metadata.depth = spec.depth;
|
||||
metadata.compress_as_srgb = false;
|
||||
|
||||
/* Check the main format, and channel formats. */
|
||||
@@ -98,7 +97,6 @@ static void oiio_load_pixels(const ImageMetaData &metadata,
|
||||
{
|
||||
const size_t width = metadata.width;
|
||||
const size_t height = metadata.height;
|
||||
const int depth = metadata.depth;
|
||||
const int components = metadata.channels;
|
||||
|
||||
/* Read pixels through OpenImageIO. */
|
||||
@@ -109,21 +107,16 @@ static void oiio_load_pixels(const ImageMetaData &metadata,
|
||||
readpixels = &tmppixels[0];
|
||||
}
|
||||
|
||||
if (depth <= 1) {
|
||||
const size_t scanlinesize = width * components * sizeof(StorageType);
|
||||
in->read_image(0,
|
||||
0,
|
||||
0,
|
||||
components,
|
||||
FileFormat,
|
||||
(uchar *)readpixels + (height - 1) * scanlinesize,
|
||||
AutoStride,
|
||||
-scanlinesize,
|
||||
AutoStride);
|
||||
}
|
||||
else {
|
||||
in->read_image(0, 0, 0, components, FileFormat, (uchar *)readpixels);
|
||||
}
|
||||
const size_t scanlinesize = width * components * sizeof(StorageType);
|
||||
in->read_image(0,
|
||||
0,
|
||||
0,
|
||||
components,
|
||||
FileFormat,
|
||||
(uchar *)readpixels + (height - 1) * scanlinesize,
|
||||
AutoStride,
|
||||
-scanlinesize,
|
||||
AutoStride);
|
||||
|
||||
if (components > 4) {
|
||||
const size_t dimensions = width * height;
|
||||
@@ -141,7 +134,7 @@ static void oiio_load_pixels(const ImageMetaData &metadata,
|
||||
if (cmyk) {
|
||||
const StorageType one = util_image_cast_from_float<StorageType>(1.0f);
|
||||
|
||||
const size_t num_pixels = width * height * depth;
|
||||
const size_t num_pixels = width * height;
|
||||
for (size_t i = num_pixels - 1, pixel = 0; pixel < num_pixels; pixel++, i--) {
|
||||
const float c = util_image_cast_to_float(pixels[i * 4 + 0]);
|
||||
const float m = util_image_cast_to_float(pixels[i * 4 + 1]);
|
||||
|
||||
@@ -30,7 +30,6 @@ bool SkyLoader::load_metadata(const ImageDeviceFeatures & /*features*/, ImageMet
|
||||
metadata.width = 512;
|
||||
metadata.height = 128;
|
||||
metadata.channels = 3;
|
||||
metadata.depth = 1;
|
||||
metadata.type = IMAGE_DATA_TYPE_FLOAT4;
|
||||
metadata.compress_as_srgb = false;
|
||||
return true;
|
||||
|
||||
@@ -22,12 +22,10 @@ template<typename T>
|
||||
void util_image_resize_pixels(const vector<T> &input_pixels,
|
||||
const size_t input_width,
|
||||
const size_t input_height,
|
||||
const size_t input_depth,
|
||||
const size_t components,
|
||||
vector<T> *output_pixels,
|
||||
size_t *output_width,
|
||||
size_t *output_height,
|
||||
size_t *output_depth);
|
||||
size_t *output_height);
|
||||
|
||||
/* Cast input pixel from unknown storage to float. */
|
||||
template<typename T> inline float util_image_cast_to_float(T value);
|
||||
|
||||
@@ -14,14 +14,12 @@ namespace {
|
||||
template<typename T>
|
||||
const T *util_image_read(const vector<T> &pixels,
|
||||
const size_t width,
|
||||
const size_t height,
|
||||
const size_t /*depth*/,
|
||||
const size_t /*height*/,
|
||||
const size_t components,
|
||||
const size_t x,
|
||||
const size_t y,
|
||||
const size_t z)
|
||||
const size_t y)
|
||||
{
|
||||
const size_t index = ((size_t)z * (width * height) + (size_t)y * width + (size_t)x) * components;
|
||||
const size_t index = ((size_t)y * width + (size_t)x) * components;
|
||||
return &pixels[index];
|
||||
}
|
||||
|
||||
@@ -29,36 +27,30 @@ template<typename T>
|
||||
void util_image_downscale_sample(const vector<T> &pixels,
|
||||
const size_t width,
|
||||
const size_t height,
|
||||
const size_t depth,
|
||||
const size_t components,
|
||||
const size_t kernel_size,
|
||||
const float x,
|
||||
const float y,
|
||||
const float z,
|
||||
T *result)
|
||||
{
|
||||
assert(components <= 4);
|
||||
const size_t ix = (size_t)x;
|
||||
const size_t iy = (size_t)y;
|
||||
const size_t iz = (size_t)z;
|
||||
/* TODO(sergey): Support something smarter than box filer. */
|
||||
float accum[4] = {0};
|
||||
size_t count = 0;
|
||||
for (size_t dz = 0; dz < kernel_size; ++dz) {
|
||||
for (size_t dy = 0; dy < kernel_size; ++dy) {
|
||||
for (size_t dx = 0; dx < kernel_size; ++dx) {
|
||||
const size_t nx = ix + dx;
|
||||
const size_t ny = iy + dy;
|
||||
const size_t nz = iz + dz;
|
||||
if (nx >= width || ny >= height || nz >= depth) {
|
||||
continue;
|
||||
}
|
||||
const T *pixel = util_image_read(pixels, width, height, depth, components, nx, ny, nz);
|
||||
for (size_t k = 0; k < components; ++k) {
|
||||
accum[k] += util_image_cast_to_float(pixel[k]);
|
||||
}
|
||||
++count;
|
||||
for (size_t dy = 0; dy < kernel_size; ++dy) {
|
||||
for (size_t dx = 0; dx < kernel_size; ++dx) {
|
||||
const size_t nx = ix + dx;
|
||||
const size_t ny = iy + dy;
|
||||
if (nx >= width || ny >= height) {
|
||||
continue;
|
||||
}
|
||||
const T *pixel = util_image_read(pixels, width, height, components, nx, ny);
|
||||
for (size_t k = 0; k < components; ++k) {
|
||||
accum[k] += util_image_cast_to_float(pixel[k]);
|
||||
}
|
||||
++count;
|
||||
}
|
||||
}
|
||||
if (count != 0) {
|
||||
@@ -78,34 +70,26 @@ template<typename T>
|
||||
void util_image_downscale_pixels(const vector<T> &input_pixels,
|
||||
const size_t input_width,
|
||||
const size_t input_height,
|
||||
const size_t input_depth,
|
||||
const size_t components,
|
||||
const float inv_scale_factor,
|
||||
const size_t output_width,
|
||||
const size_t output_height,
|
||||
const size_t output_depth,
|
||||
vector<T> *output_pixels)
|
||||
{
|
||||
const size_t kernel_size = (size_t)(inv_scale_factor + 0.5f);
|
||||
for (size_t z = 0; z < output_depth; ++z) {
|
||||
for (size_t y = 0; y < output_height; ++y) {
|
||||
for (size_t x = 0; x < output_width; ++x) {
|
||||
const float input_x = (float)x * inv_scale_factor;
|
||||
const float input_y = (float)y * inv_scale_factor;
|
||||
const float input_z = (float)z * inv_scale_factor;
|
||||
const size_t output_index = (z * output_width * output_height + y * output_width + x) *
|
||||
components;
|
||||
util_image_downscale_sample(input_pixels,
|
||||
input_width,
|
||||
input_height,
|
||||
input_depth,
|
||||
components,
|
||||
kernel_size,
|
||||
input_x,
|
||||
input_y,
|
||||
input_z,
|
||||
&output_pixels->at(output_index));
|
||||
}
|
||||
for (size_t y = 0; y < output_height; ++y) {
|
||||
for (size_t x = 0; x < output_width; ++x) {
|
||||
const float input_x = (float)x * inv_scale_factor;
|
||||
const float input_y = (float)y * inv_scale_factor;
|
||||
const size_t output_index = (y * output_width + x) * components;
|
||||
util_image_downscale_sample(input_pixels,
|
||||
input_width,
|
||||
input_height,
|
||||
components,
|
||||
kernel_size,
|
||||
input_x,
|
||||
input_y,
|
||||
&output_pixels->at(output_index));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -116,19 +100,16 @@ template<typename T>
|
||||
void util_image_resize_pixels(const vector<T> &input_pixels,
|
||||
const size_t input_width,
|
||||
const size_t input_height,
|
||||
const size_t input_depth,
|
||||
const size_t components,
|
||||
const float scale_factor,
|
||||
vector<T> *output_pixels,
|
||||
size_t *output_width,
|
||||
size_t *output_height,
|
||||
size_t *output_depth)
|
||||
size_t *output_height)
|
||||
{
|
||||
/* Early output for case when no scaling is applied. */
|
||||
if (scale_factor == 1.0f) {
|
||||
*output_width = input_width;
|
||||
*output_height = input_height;
|
||||
*output_depth = input_depth;
|
||||
*output_pixels = input_pixels;
|
||||
return;
|
||||
}
|
||||
@@ -138,22 +119,18 @@ void util_image_resize_pixels(const vector<T> &input_pixels,
|
||||
*/
|
||||
*output_width = max((size_t)((float)input_width * scale_factor), (size_t)1);
|
||||
*output_height = max((size_t)((float)input_height * scale_factor), (size_t)1);
|
||||
*output_depth = max((size_t)((float)input_depth * scale_factor), (size_t)1);
|
||||
/* Prepare pixel storage for the result. */
|
||||
const size_t num_output_pixels = ((*output_width) * (*output_height) * (*output_depth)) *
|
||||
components;
|
||||
const size_t num_output_pixels = ((*output_width) * (*output_height)) * components;
|
||||
output_pixels->resize(num_output_pixels);
|
||||
if (scale_factor < 1.0f) {
|
||||
const float inv_scale_factor = 1.0f / scale_factor;
|
||||
util_image_downscale_pixels(input_pixels,
|
||||
input_width,
|
||||
input_height,
|
||||
input_depth,
|
||||
components,
|
||||
inv_scale_factor,
|
||||
*output_width,
|
||||
*output_height,
|
||||
*output_depth,
|
||||
output_pixels);
|
||||
}
|
||||
else {
|
||||
|
||||
@@ -5,7 +5,6 @@
|
||||
#pragma once
|
||||
|
||||
#ifdef WITH_OPENVDB
|
||||
# include <memory>
|
||||
# include <openvdb/openvdb.h>
|
||||
|
||||
namespace openvdb {
|
||||
|
||||
@@ -91,7 +91,6 @@ struct TextureInfo {
|
||||
/* Dimensions. */
|
||||
uint width = 0;
|
||||
uint height = 0;
|
||||
uint depth = 0;
|
||||
/* Transform for 3D textures. */
|
||||
uint use_transform_3d = false;
|
||||
Transform transform_3d = transform_zero();
|
||||
|
||||
Reference in New Issue
Block a user