2022-02-11 13:53:21 +01:00
|
|
|
/* SPDX-License-Identifier: Apache-2.0
|
|
|
|
|
* Copyright 2011-2022 Blender Foundation */
|
2016-05-20 16:46:49 +02:00
|
|
|
|
Cycles: merge of cycles-x branch, a major update to the renderer
This includes much improved GPU rendering performance, viewport interactivity,
new shadow catcher, revamped sampling settings, subsurface scattering anisotropy,
new GPU volume sampling, improved PMJ sampling pattern, and more.
Some features have also been removed or changed, breaking backwards compatibility.
Including the removal of the OpenCL backend, for which alternatives are under
development.
Release notes and code docs:
https://wiki.blender.org/wiki/Reference/Release_Notes/3.0/Cycles
https://wiki.blender.org/wiki/Source/Render/Cycles
Credits:
* Sergey Sharybin
* Brecht Van Lommel
* Patrick Mours (OptiX backend)
* Christophe Hery (subsurface scattering anisotropy)
* William Leeson (PMJ sampling pattern)
* Alaska (various fixes and tweaks)
* Thomas Dinges (various fixes)
For the full commit history, see the cycles-x branch. This squashes together
all the changes since intermediate changes would often fail building or tests.
Ref T87839, T87837, T87836
Fixes T90734, T89353, T80267, T80267, T77185, T69800
2021-09-20 17:59:20 +02:00
|
|
|
#pragma once
|
2016-05-20 16:46:49 +02:00
|
|
|
|
2020-10-02 17:40:28 +02:00
|
|
|
#ifdef WITH_NANOVDB
|
2020-12-03 15:20:50 +01:00
|
|
|
# define NANOVDB_USE_INTRINSICS
|
2020-10-02 17:40:28 +02:00
|
|
|
# include <nanovdb/NanoVDB.h>
|
|
|
|
|
# include <nanovdb/util/SampleFromVoxels.h>
|
|
|
|
|
#endif
|
|
|
|
|
|
2016-05-20 16:46:49 +02:00
|
|
|
CCL_NAMESPACE_BEGIN
|
|
|
|
|
|
2019-08-16 16:27:15 +02:00
|
|
|
/* Make template functions private so symbols don't conflict between kernels with different
|
|
|
|
|
* instruction sets. */
|
|
|
|
|
namespace {
|
|
|
|
|
|
2017-10-06 21:47:41 +02:00
|
|
|
#define SET_CUBIC_SPLINE_WEIGHTS(u, t) \
|
|
|
|
|
{ \
|
|
|
|
|
u[0] = (((-1.0f / 6.0f) * t + 0.5f) * t - 0.5f) * t + (1.0f / 6.0f); \
|
|
|
|
|
u[1] = ((0.5f * t - 1.0f) * t) * t + (2.0f / 3.0f); \
|
|
|
|
|
u[2] = ((-0.5f * t + 0.5f) * t + 0.5f) * t + (1.0f / 6.0f); \
|
|
|
|
|
u[3] = (1.0f / 6.0f) * t * t * t; \
|
2018-11-09 12:08:51 +01:00
|
|
|
} \
|
|
|
|
|
(void)0
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2020-11-09 14:41:00 +01:00
|
|
|
ccl_device_inline float frac(float x, int *ix)
|
2020-11-06 15:19:58 +01:00
|
|
|
{
|
|
|
|
|
int i = float_to_int(x) - ((x < 0.0f) ? 1 : 0);
|
|
|
|
|
*ix = i;
|
|
|
|
|
return x - (float)i;
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-23 22:01:32 +01:00
|
|
|
template<typename TexT, typename OutT = float4> struct TextureInterpolator {
|
2020-11-06 15:19:58 +01:00
|
|
|
|
2022-03-23 22:46:17 +01:00
|
|
|
static ccl_always_inline OutT zero()
|
|
|
|
|
{
|
|
|
|
|
if constexpr (std::is_same<OutT, float4>::value) {
|
|
|
|
|
return zero_float4();
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
return 0.0f;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2017-10-06 21:47:41 +02:00
|
|
|
static ccl_always_inline float4 read(float4 r)
|
|
|
|
|
{
|
|
|
|
|
return r;
|
|
|
|
|
}
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2017-10-06 21:47:41 +02:00
|
|
|
static ccl_always_inline float4 read(uchar4 r)
|
|
|
|
|
{
|
2022-03-23 15:45:32 +01:00
|
|
|
const float f = 1.0f / 255.0f;
|
2017-10-06 21:47:41 +02:00
|
|
|
return make_float4(r.x * f, r.y * f, r.z * f, r.w * f);
|
|
|
|
|
}
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2022-03-23 15:45:32 +01:00
|
|
|
static ccl_always_inline float read(uchar r)
|
2017-10-06 21:47:41 +02:00
|
|
|
{
|
2022-03-23 15:45:32 +01:00
|
|
|
return r * (1.0f / 255.0f);
|
2017-10-06 21:47:41 +02:00
|
|
|
}
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2022-03-23 15:45:32 +01:00
|
|
|
static ccl_always_inline float read(float r)
|
2017-10-06 21:47:41 +02:00
|
|
|
{
|
2022-03-23 15:45:32 +01:00
|
|
|
return r;
|
2017-10-06 21:47:41 +02:00
|
|
|
}
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2017-10-06 21:47:41 +02:00
|
|
|
static ccl_always_inline float4 read(half4 r)
|
|
|
|
|
{
|
2021-10-21 19:25:38 +02:00
|
|
|
return half4_to_float4_image(r);
|
2017-10-06 21:47:41 +02:00
|
|
|
}
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2022-03-23 15:45:32 +01:00
|
|
|
static ccl_always_inline float read(half r)
|
2017-10-06 21:47:41 +02:00
|
|
|
{
|
2022-03-23 15:45:32 +01:00
|
|
|
return half_to_float_image(r);
|
2017-10-06 21:47:41 +02:00
|
|
|
}
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2022-03-23 15:45:32 +01:00
|
|
|
static ccl_always_inline float read(uint16_t r)
|
2018-07-05 12:37:52 +02:00
|
|
|
{
|
2022-03-23 15:45:32 +01:00
|
|
|
return r * (1.0f / 65535.0f);
|
2018-07-05 12:37:52 +02:00
|
|
|
}
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2018-07-05 12:37:52 +02:00
|
|
|
static ccl_always_inline float4 read(ushort4 r)
|
|
|
|
|
{
|
2022-03-23 15:45:32 +01:00
|
|
|
const float f = 1.0f / 65535.0f;
|
2018-07-05 12:37:52 +02:00
|
|
|
return make_float4(r.x * f, r.y * f, r.z * f, r.w * f);
|
|
|
|
|
}
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2022-03-17 16:01:39 +01:00
|
|
|
/* Read 2D Texture Data
|
|
|
|
|
* Does not check if data request is in bounds. */
|
2022-03-23 15:45:32 +01:00
|
|
|
static ccl_always_inline OutT read(const TexT *data, int x, int y, int width, int height)
|
2017-12-08 11:20:12 +01:00
|
|
|
{
|
2022-03-17 16:01:39 +01:00
|
|
|
return read(data[y * width + x]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Read 2D Texture Data Clip
|
|
|
|
|
* Returns transparent black if data request is out of bounds. */
|
2022-03-23 15:45:32 +01:00
|
|
|
static ccl_always_inline OutT read_clip(const TexT *data, int x, int y, int width, int height)
|
2022-03-17 16:01:39 +01:00
|
|
|
{
|
|
|
|
|
if (x < 0 || x >= width || y < 0 || y >= height) {
|
2022-03-23 22:46:17 +01:00
|
|
|
return zero();
|
2017-12-08 11:20:12 +01:00
|
|
|
}
|
|
|
|
|
return read(data[y * width + x]);
|
|
|
|
|
}
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2022-03-17 16:01:39 +01:00
|
|
|
/* Read 3D Texture Data
|
|
|
|
|
* Does not check if data request is in bounds. */
|
2022-03-23 15:45:32 +01:00
|
|
|
static ccl_always_inline OutT
|
|
|
|
|
read(const TexT *data, int x, int y, int z, int width, int height, int depth)
|
2022-03-17 16:01:39 +01:00
|
|
|
{
|
|
|
|
|
return read(data[x + y * width + z * width * height]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Read 3D Texture Data Clip
|
|
|
|
|
* Returns transparent black if data request is out of bounds. */
|
2022-03-23 15:45:32 +01:00
|
|
|
static ccl_always_inline OutT
|
|
|
|
|
read_clip(const TexT *data, int x, int y, int z, int width, int height, int depth)
|
2022-03-17 16:01:39 +01:00
|
|
|
{
|
|
|
|
|
if (x < 0 || x >= width || y < 0 || y >= height || z < 0 || z >= depth) {
|
2022-03-23 22:46:17 +01:00
|
|
|
return zero();
|
2022-03-17 16:01:39 +01:00
|
|
|
}
|
|
|
|
|
return read(data[x + y * width + z * width * height]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Trilinear Interpolation */
|
2022-03-23 15:45:32 +01:00
|
|
|
static ccl_always_inline OutT
|
|
|
|
|
trilinear_lookup(const TexT *data,
|
2022-03-17 16:01:39 +01:00
|
|
|
float tx,
|
|
|
|
|
float ty,
|
|
|
|
|
float tz,
|
|
|
|
|
int ix,
|
|
|
|
|
int iy,
|
|
|
|
|
int iz,
|
|
|
|
|
int nix,
|
|
|
|
|
int niy,
|
|
|
|
|
int niz,
|
|
|
|
|
int width,
|
|
|
|
|
int height,
|
|
|
|
|
int depth,
|
2022-03-23 15:45:32 +01:00
|
|
|
OutT read(const TexT *, int, int, int, int, int, int))
|
2022-03-17 16:01:39 +01:00
|
|
|
{
|
2022-03-23 15:45:32 +01:00
|
|
|
OutT r = (1.0f - tz) * (1.0f - ty) * (1.0f - tx) *
|
|
|
|
|
read(data, ix, iy, iz, width, height, depth);
|
2022-03-17 16:01:39 +01:00
|
|
|
r += (1.0f - tz) * (1.0f - ty) * tx * read(data, nix, iy, iz, width, height, depth);
|
|
|
|
|
r += (1.0f - tz) * ty * (1.0f - tx) * read(data, ix, niy, iz, width, height, depth);
|
|
|
|
|
r += (1.0f - tz) * ty * tx * read(data, nix, niy, iz, width, height, depth);
|
|
|
|
|
|
|
|
|
|
r += tz * (1.0f - ty) * (1.0f - tx) * read(data, ix, iy, niz, width, height, depth);
|
|
|
|
|
r += tz * (1.0f - ty) * tx * read(data, nix, iy, niz, width, height, depth);
|
|
|
|
|
r += tz * ty * (1.0f - tx) * read(data, ix, niy, niz, width, height, depth);
|
|
|
|
|
r += tz * ty * tx * read(data, nix, niy, niz, width, height, depth);
|
|
|
|
|
return r;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/** Tricubic Interpolation */
|
2022-03-23 15:45:32 +01:00
|
|
|
static ccl_always_inline OutT
|
|
|
|
|
tricubic_lookup(const TexT *data,
|
2022-03-17 16:01:39 +01:00
|
|
|
float tx,
|
|
|
|
|
float ty,
|
|
|
|
|
float tz,
|
|
|
|
|
const int xc[4],
|
|
|
|
|
const int yc[4],
|
|
|
|
|
const int zc[4],
|
|
|
|
|
int width,
|
|
|
|
|
int height,
|
|
|
|
|
int depth,
|
2022-03-23 15:45:32 +01:00
|
|
|
OutT read(const TexT *, int, int, int, int, int, int))
|
2022-03-17 16:01:39 +01:00
|
|
|
{
|
|
|
|
|
float u[4], v[4], w[4];
|
|
|
|
|
|
|
|
|
|
/* Some helper macros to keep code size reasonable.
|
|
|
|
|
* Lets the compiler inline all the matrix multiplications.
|
|
|
|
|
*/
|
|
|
|
|
#define DATA(x, y, z) (read(data, xc[x], yc[y], zc[z], width, height, depth))
|
|
|
|
|
#define COL_TERM(col, row) \
|
|
|
|
|
(v[col] * (u[0] * DATA(0, col, row) + u[1] * DATA(1, col, row) + u[2] * DATA(2, col, row) + \
|
|
|
|
|
u[3] * DATA(3, col, row)))
|
|
|
|
|
#define ROW_TERM(row) \
|
|
|
|
|
(w[row] * (COL_TERM(0, row) + COL_TERM(1, row) + COL_TERM(2, row) + COL_TERM(3, row)))
|
|
|
|
|
|
|
|
|
|
SET_CUBIC_SPLINE_WEIGHTS(u, tx);
|
|
|
|
|
SET_CUBIC_SPLINE_WEIGHTS(v, ty);
|
|
|
|
|
SET_CUBIC_SPLINE_WEIGHTS(w, tz);
|
|
|
|
|
/* Actual interpolation. */
|
|
|
|
|
return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3);
|
|
|
|
|
|
|
|
|
|
#undef COL_TERM
|
|
|
|
|
#undef ROW_TERM
|
|
|
|
|
#undef DATA
|
|
|
|
|
}
|
|
|
|
|
|
2017-10-06 21:47:41 +02:00
|
|
|
static ccl_always_inline int wrap_periodic(int x, int width)
|
|
|
|
|
{
|
|
|
|
|
x %= width;
|
2022-03-17 16:01:39 +01:00
|
|
|
if (x < 0) {
|
2017-10-06 21:47:41 +02:00
|
|
|
x += width;
|
2022-03-17 16:01:39 +01:00
|
|
|
}
|
2017-10-06 21:47:41 +02:00
|
|
|
return x;
|
|
|
|
|
}
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2017-10-06 21:47:41 +02:00
|
|
|
static ccl_always_inline int wrap_clamp(int x, int width)
|
|
|
|
|
{
|
|
|
|
|
return clamp(x, 0, width - 1);
|
|
|
|
|
}
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2017-12-07 17:45:37 +01:00
|
|
|
/* ******** 2D interpolation ******** */
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2022-03-23 15:45:32 +01:00
|
|
|
static ccl_always_inline OutT interp_closest(const TextureInfo &info, float x, float y)
|
2017-12-07 17:45:37 +01:00
|
|
|
{
|
|
|
|
|
const int width = info.width;
|
|
|
|
|
const int height = info.height;
|
|
|
|
|
int ix, iy;
|
|
|
|
|
frac(x * (float)width, &ix);
|
|
|
|
|
frac(y * (float)height, &iy);
|
|
|
|
|
switch (info.extension) {
|
|
|
|
|
case EXTENSION_REPEAT:
|
|
|
|
|
ix = wrap_periodic(ix, width);
|
|
|
|
|
iy = wrap_periodic(iy, height);
|
|
|
|
|
break;
|
|
|
|
|
case EXTENSION_CLIP:
|
2022-03-17 16:01:39 +01:00
|
|
|
/* No samples are inside the clip region. */
|
|
|
|
|
if (ix < 0 || ix >= width || iy < 0 || iy >= height) {
|
2022-03-23 22:46:17 +01:00
|
|
|
return zero();
|
2017-12-07 17:45:37 +01:00
|
|
|
}
|
2022-03-17 16:01:39 +01:00
|
|
|
break;
|
2017-12-07 17:45:37 +01:00
|
|
|
case EXTENSION_EXTEND:
|
|
|
|
|
ix = wrap_clamp(ix, width);
|
|
|
|
|
iy = wrap_clamp(iy, height);
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
kernel_assert(0);
|
2022-03-23 22:46:17 +01:00
|
|
|
return zero();
|
2017-10-06 21:47:41 +02:00
|
|
|
}
|
2022-03-17 16:01:39 +01:00
|
|
|
|
2022-03-23 15:45:32 +01:00
|
|
|
const TexT *data = (const TexT *)info.data;
|
|
|
|
|
return read((const TexT *)data, ix, iy, width, height);
|
2017-12-07 17:45:37 +01:00
|
|
|
}
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2022-03-23 15:45:32 +01:00
|
|
|
static ccl_always_inline OutT interp_linear(const TextureInfo &info, float x, float y)
|
2017-12-07 17:45:37 +01:00
|
|
|
{
|
|
|
|
|
const int width = info.width;
|
|
|
|
|
const int height = info.height;
|
2022-03-17 16:01:39 +01:00
|
|
|
|
|
|
|
|
/* A -0.5 offset is used to center the linear samples around the sample point. */
|
|
|
|
|
int ix, iy;
|
|
|
|
|
int nix, niy;
|
2017-12-07 17:45:37 +01:00
|
|
|
const float tx = frac(x * (float)width - 0.5f, &ix);
|
|
|
|
|
const float ty = frac(y * (float)height - 0.5f, &iy);
|
2022-03-17 16:01:39 +01:00
|
|
|
|
2017-12-07 17:45:37 +01:00
|
|
|
switch (info.extension) {
|
|
|
|
|
case EXTENSION_REPEAT:
|
|
|
|
|
ix = wrap_periodic(ix, width);
|
|
|
|
|
nix = wrap_periodic(ix + 1, width);
|
2022-03-17 16:01:39 +01:00
|
|
|
|
|
|
|
|
iy = wrap_periodic(iy, height);
|
2017-12-07 17:45:37 +01:00
|
|
|
niy = wrap_periodic(iy + 1, height);
|
|
|
|
|
break;
|
|
|
|
|
case EXTENSION_CLIP:
|
2022-03-17 16:01:39 +01:00
|
|
|
/* No linear samples are inside the clip region. */
|
|
|
|
|
if (ix < -1 || ix >= width || iy < -1 || iy >= height) {
|
2022-03-23 22:46:17 +01:00
|
|
|
return zero();
|
2022-03-17 16:01:39 +01:00
|
|
|
}
|
2017-12-08 11:20:12 +01:00
|
|
|
nix = ix + 1;
|
|
|
|
|
niy = iy + 1;
|
|
|
|
|
break;
|
2017-12-07 17:45:37 +01:00
|
|
|
case EXTENSION_EXTEND:
|
|
|
|
|
nix = wrap_clamp(ix + 1, width);
|
|
|
|
|
ix = wrap_clamp(ix, width);
|
2022-03-17 16:01:39 +01:00
|
|
|
niy = wrap_clamp(iy + 1, height);
|
2017-12-07 17:45:37 +01:00
|
|
|
iy = wrap_clamp(iy, height);
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
kernel_assert(0);
|
2022-03-23 22:46:17 +01:00
|
|
|
return zero();
|
2017-10-06 21:47:41 +02:00
|
|
|
}
|
2022-03-17 16:01:39 +01:00
|
|
|
|
2022-03-23 15:45:32 +01:00
|
|
|
const TexT *data = (const TexT *)info.data;
|
2022-03-17 16:01:39 +01:00
|
|
|
return (1.0f - ty) * (1.0f - tx) * read_clip(data, ix, iy, width, height) +
|
|
|
|
|
(1.0f - ty) * tx * read_clip(data, nix, iy, width, height) +
|
|
|
|
|
ty * (1.0f - tx) * read_clip(data, ix, niy, width, height) +
|
|
|
|
|
ty * tx * read_clip(data, nix, niy, width, height);
|
2017-12-07 17:45:37 +01:00
|
|
|
}
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2022-03-23 15:45:32 +01:00
|
|
|
static ccl_always_inline OutT interp_cubic(const TextureInfo &info, float x, float y)
|
2017-12-07 17:45:37 +01:00
|
|
|
{
|
|
|
|
|
const int width = info.width;
|
|
|
|
|
const int height = info.height;
|
2022-03-17 16:01:39 +01:00
|
|
|
|
|
|
|
|
/* A -0.5 offset is used to center the cubic samples around the sample point. */
|
|
|
|
|
int ix, iy;
|
2017-12-07 17:45:37 +01:00
|
|
|
const float tx = frac(x * (float)width - 0.5f, &ix);
|
|
|
|
|
const float ty = frac(y * (float)height - 0.5f, &iy);
|
2022-03-17 16:01:39 +01:00
|
|
|
|
|
|
|
|
int pix, piy;
|
|
|
|
|
int nix, niy;
|
|
|
|
|
int nnix, nniy;
|
|
|
|
|
|
2017-12-07 17:45:37 +01:00
|
|
|
switch (info.extension) {
|
|
|
|
|
case EXTENSION_REPEAT:
|
|
|
|
|
ix = wrap_periodic(ix, width);
|
|
|
|
|
pix = wrap_periodic(ix - 1, width);
|
|
|
|
|
nix = wrap_periodic(ix + 1, width);
|
|
|
|
|
nnix = wrap_periodic(ix + 2, width);
|
2022-03-17 16:01:39 +01:00
|
|
|
|
|
|
|
|
iy = wrap_periodic(iy, height);
|
|
|
|
|
piy = wrap_periodic(iy - 1, height);
|
|
|
|
|
niy = wrap_periodic(iy + 1, height);
|
2017-12-07 17:45:37 +01:00
|
|
|
nniy = wrap_periodic(iy + 2, height);
|
|
|
|
|
break;
|
|
|
|
|
case EXTENSION_CLIP:
|
2022-03-17 16:01:39 +01:00
|
|
|
/* No cubic samples are inside the clip region. */
|
|
|
|
|
if (ix < -2 || ix > width || iy < -2 || iy > height) {
|
2022-03-23 22:46:17 +01:00
|
|
|
return zero();
|
2022-03-17 16:01:39 +01:00
|
|
|
}
|
|
|
|
|
|
2017-12-08 11:20:12 +01:00
|
|
|
pix = ix - 1;
|
|
|
|
|
nix = ix + 1;
|
|
|
|
|
nnix = ix + 2;
|
2022-03-17 16:01:39 +01:00
|
|
|
|
|
|
|
|
piy = iy - 1;
|
|
|
|
|
niy = iy + 1;
|
2017-12-08 11:20:12 +01:00
|
|
|
nniy = iy + 2;
|
|
|
|
|
break;
|
2017-12-07 17:45:37 +01:00
|
|
|
case EXTENSION_EXTEND:
|
|
|
|
|
pix = wrap_clamp(ix - 1, width);
|
|
|
|
|
nix = wrap_clamp(ix + 1, width);
|
|
|
|
|
nnix = wrap_clamp(ix + 2, width);
|
|
|
|
|
ix = wrap_clamp(ix, width);
|
2022-03-17 16:01:39 +01:00
|
|
|
|
|
|
|
|
piy = wrap_clamp(iy - 1, height);
|
|
|
|
|
niy = wrap_clamp(iy + 1, height);
|
|
|
|
|
nniy = wrap_clamp(iy + 2, height);
|
2017-12-07 17:45:37 +01:00
|
|
|
iy = wrap_clamp(iy, height);
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
kernel_assert(0);
|
2022-03-23 22:46:17 +01:00
|
|
|
return zero();
|
2017-12-07 17:45:37 +01:00
|
|
|
}
|
2022-03-17 16:01:39 +01:00
|
|
|
|
2022-03-23 15:45:32 +01:00
|
|
|
const TexT *data = (const TexT *)info.data;
|
2017-12-07 17:45:37 +01:00
|
|
|
const int xc[4] = {pix, ix, nix, nnix};
|
2017-12-08 11:20:12 +01:00
|
|
|
const int yc[4] = {piy, iy, niy, nniy};
|
2017-12-07 17:45:37 +01:00
|
|
|
float u[4], v[4];
|
2022-03-17 16:01:39 +01:00
|
|
|
|
|
|
|
|
/* Some helper macros to keep code size reasonable.
|
|
|
|
|
* Lets the compiler inline all the matrix multiplications.
|
2017-12-07 17:45:37 +01:00
|
|
|
*/
|
2022-03-17 16:01:39 +01:00
|
|
|
#define DATA(x, y) (read_clip(data, xc[x], yc[y], width, height))
|
2017-10-06 21:47:41 +02:00
|
|
|
#define TERM(col) \
|
2017-12-07 17:45:37 +01:00
|
|
|
(v[col] * \
|
|
|
|
|
(u[0] * DATA(0, col) + u[1] * DATA(1, col) + u[2] * DATA(2, col) + u[3] * DATA(3, col)))
|
2017-10-06 21:47:41 +02:00
|
|
|
|
2017-12-07 17:45:37 +01:00
|
|
|
SET_CUBIC_SPLINE_WEIGHTS(u, tx);
|
|
|
|
|
SET_CUBIC_SPLINE_WEIGHTS(v, ty);
|
2017-10-06 21:47:41 +02:00
|
|
|
|
2017-12-07 17:45:37 +01:00
|
|
|
/* Actual interpolation. */
|
|
|
|
|
return TERM(0) + TERM(1) + TERM(2) + TERM(3);
|
2017-10-06 21:47:41 +02:00
|
|
|
#undef TERM
|
|
|
|
|
#undef DATA
|
2017-12-07 17:45:37 +01:00
|
|
|
}
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2022-03-23 15:45:32 +01:00
|
|
|
static ccl_always_inline OutT interp(const TextureInfo &info, float x, float y)
|
2017-12-07 17:45:37 +01:00
|
|
|
{
|
|
|
|
|
switch (info.interpolation) {
|
|
|
|
|
case INTERPOLATION_CLOSEST:
|
|
|
|
|
return interp_closest(info, x, y);
|
|
|
|
|
case INTERPOLATION_LINEAR:
|
|
|
|
|
return interp_linear(info, x, y);
|
|
|
|
|
default:
|
|
|
|
|
return interp_cubic(info, x, y);
|
2019-04-17 06:17:24 +02:00
|
|
|
}
|
2017-10-06 21:47:41 +02:00
|
|
|
}
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2017-12-07 17:45:37 +01:00
|
|
|
/* ******** 3D interpolation ******** */
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2022-03-23 15:45:32 +01:00
|
|
|
static ccl_always_inline OutT interp_3d_closest(const TextureInfo &info,
|
|
|
|
|
float x,
|
|
|
|
|
float y,
|
|
|
|
|
float z)
|
2017-10-06 21:47:41 +02:00
|
|
|
{
|
2022-03-17 16:01:39 +01:00
|
|
|
const int width = info.width;
|
|
|
|
|
const int height = info.height;
|
|
|
|
|
const int depth = info.depth;
|
2017-10-06 21:47:41 +02:00
|
|
|
int ix, iy, iz;
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2017-10-06 21:47:41 +02:00
|
|
|
frac(x * (float)width, &ix);
|
|
|
|
|
frac(y * (float)height, &iy);
|
|
|
|
|
frac(z * (float)depth, &iz);
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2017-10-06 21:47:41 +02:00
|
|
|
switch (info.extension) {
|
|
|
|
|
case EXTENSION_REPEAT:
|
|
|
|
|
ix = wrap_periodic(ix, width);
|
|
|
|
|
iy = wrap_periodic(iy, height);
|
|
|
|
|
iz = wrap_periodic(iz, depth);
|
|
|
|
|
break;
|
|
|
|
|
case EXTENSION_CLIP:
|
2022-03-17 16:01:39 +01:00
|
|
|
/* No samples are inside the clip region. */
|
|
|
|
|
if (ix < 0 || ix >= width || iy < 0 || iy >= height || iz < 0 || iz >= depth) {
|
2022-03-23 22:46:17 +01:00
|
|
|
return zero();
|
2017-10-06 21:47:41 +02:00
|
|
|
}
|
2022-03-17 16:01:39 +01:00
|
|
|
break;
|
2017-10-06 21:47:41 +02:00
|
|
|
case EXTENSION_EXTEND:
|
|
|
|
|
ix = wrap_clamp(ix, width);
|
|
|
|
|
iy = wrap_clamp(iy, height);
|
|
|
|
|
iz = wrap_clamp(iz, depth);
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
kernel_assert(0);
|
2022-03-23 22:46:17 +01:00
|
|
|
return zero();
|
2017-10-06 21:47:41 +02:00
|
|
|
}
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2022-03-23 15:45:32 +01:00
|
|
|
const TexT *data = (const TexT *)info.data;
|
2022-03-17 16:01:39 +01:00
|
|
|
return read(data, ix, iy, iz, width, height, depth);
|
2017-10-06 21:47:41 +02:00
|
|
|
}
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2022-03-23 15:45:32 +01:00
|
|
|
static ccl_always_inline OutT interp_3d_linear(const TextureInfo &info,
|
|
|
|
|
float x,
|
|
|
|
|
float y,
|
|
|
|
|
float z)
|
2017-10-06 21:47:41 +02:00
|
|
|
{
|
2022-03-17 16:01:39 +01:00
|
|
|
const int width = info.width;
|
|
|
|
|
const int height = info.height;
|
|
|
|
|
const int depth = info.depth;
|
2017-10-06 21:47:41 +02:00
|
|
|
int ix, iy, iz;
|
|
|
|
|
int nix, niy, niz;
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2022-03-17 16:01:39 +01:00
|
|
|
/* A -0.5 offset is used to center the linear samples around the sample point. */
|
2017-10-06 21:47:41 +02:00
|
|
|
float tx = frac(x * (float)width - 0.5f, &ix);
|
|
|
|
|
float ty = frac(y * (float)height - 0.5f, &iy);
|
|
|
|
|
float tz = frac(z * (float)depth - 0.5f, &iz);
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2017-10-06 21:47:41 +02:00
|
|
|
switch (info.extension) {
|
|
|
|
|
case EXTENSION_REPEAT:
|
|
|
|
|
ix = wrap_periodic(ix, width);
|
|
|
|
|
nix = wrap_periodic(ix + 1, width);
|
2022-03-17 16:01:39 +01:00
|
|
|
|
|
|
|
|
iy = wrap_periodic(iy, height);
|
2017-10-06 21:47:41 +02:00
|
|
|
niy = wrap_periodic(iy + 1, height);
|
2022-03-17 16:01:39 +01:00
|
|
|
|
|
|
|
|
iz = wrap_periodic(iz, depth);
|
2017-10-06 21:47:41 +02:00
|
|
|
niz = wrap_periodic(iz + 1, depth);
|
|
|
|
|
break;
|
|
|
|
|
case EXTENSION_CLIP:
|
2022-03-17 16:01:39 +01:00
|
|
|
/* No linear samples are inside the clip region. */
|
|
|
|
|
if (ix < -1 || ix >= width || iy < -1 || iy >= height || iz < -1 || iz >= depth) {
|
2022-03-23 22:46:17 +01:00
|
|
|
return zero();
|
2017-10-06 21:47:41 +02:00
|
|
|
}
|
2022-03-17 16:01:39 +01:00
|
|
|
|
|
|
|
|
nix = ix + 1;
|
|
|
|
|
niy = iy + 1;
|
|
|
|
|
niz = iz + 1;
|
|
|
|
|
|
|
|
|
|
/* All linear samples are inside the clip region. */
|
|
|
|
|
if (ix >= 0 && nix < width && iy >= 0 && niy < height && iz >= 0 && niz < depth) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* The linear samples span the clip border.
|
|
|
|
|
* #read_clip is used to ensure proper interpolation across the clip border. */
|
2022-03-23 15:45:32 +01:00
|
|
|
return trilinear_lookup((const TexT *)info.data,
|
2022-03-17 16:01:39 +01:00
|
|
|
tx,
|
|
|
|
|
ty,
|
|
|
|
|
tz,
|
|
|
|
|
ix,
|
|
|
|
|
iy,
|
|
|
|
|
iz,
|
|
|
|
|
nix,
|
|
|
|
|
niy,
|
|
|
|
|
niz,
|
|
|
|
|
width,
|
|
|
|
|
height,
|
|
|
|
|
depth,
|
|
|
|
|
read_clip);
|
2017-10-06 21:47:41 +02:00
|
|
|
case EXTENSION_EXTEND:
|
|
|
|
|
nix = wrap_clamp(ix + 1, width);
|
|
|
|
|
ix = wrap_clamp(ix, width);
|
2022-03-17 16:01:39 +01:00
|
|
|
|
|
|
|
|
niy = wrap_clamp(iy + 1, height);
|
2017-10-06 21:47:41 +02:00
|
|
|
iy = wrap_clamp(iy, height);
|
2022-03-17 16:01:39 +01:00
|
|
|
|
|
|
|
|
niz = wrap_clamp(iz + 1, depth);
|
2017-10-06 21:47:41 +02:00
|
|
|
iz = wrap_clamp(iz, depth);
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
kernel_assert(0);
|
2022-03-23 22:46:17 +01:00
|
|
|
return zero();
|
2017-10-06 21:47:41 +02:00
|
|
|
}
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2022-03-23 15:45:32 +01:00
|
|
|
return trilinear_lookup((const TexT *)info.data,
|
|
|
|
|
tx,
|
|
|
|
|
ty,
|
|
|
|
|
tz,
|
|
|
|
|
ix,
|
|
|
|
|
iy,
|
|
|
|
|
iz,
|
|
|
|
|
nix,
|
|
|
|
|
niy,
|
|
|
|
|
niz,
|
|
|
|
|
width,
|
|
|
|
|
height,
|
|
|
|
|
depth,
|
|
|
|
|
read);
|
2017-10-06 21:47:41 +02:00
|
|
|
}
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2022-03-17 16:01:39 +01:00
|
|
|
/* Tricubic b-spline interpolation.
|
|
|
|
|
*
|
|
|
|
|
* TODO(sergey): For some unspeakable reason both GCC-6 and Clang-3.9 are
|
2017-10-06 21:47:41 +02:00
|
|
|
* causing stack overflow issue in this function unless it is inlined.
|
|
|
|
|
*
|
|
|
|
|
* Only happens for AVX2 kernel and global __KERNEL_SSE__ vectorization
|
|
|
|
|
* enabled.
|
|
|
|
|
*/
|
Windows: Add support for building with clang.
This commit contains the minimum to make clang build/work with blender, asan and ninja build support is forthcoming
Things to note:
1) Builds and runs, and is able to pass all tests (except for the freestyle_stroke_material.blend test which was broken at that time for all platforms by the looks of it)
2) It's slightly faster than msvc when using cycles. (time in seconds, on an i7-3370)
victor_cpu
msvc:3099.51
clang:2796.43
pavillon_barcelona_cpu
msvc:1872.05
clang:1827.72
koro_cpu
msvc:1097.58
clang:1006.51
fishy_cat_cpu
msvc:815.37
clang:722.2
classroom_cpu
msvc:1705.39
clang:1575.43
bmw27_cpu
msvc:552.38
clang:561.53
barbershop_interior_cpu
msvc:2134.93
clang:1922.33
3) clang on windows uses a drop in replacement for the Microsoft cl.exe (takes some of the Microsoft parameters, but not all, and takes some of the clang parameters but not all) and uses ms headers + libraries + linker, so you still need visual studio installed and will use our existing vc14 svn libs.
4) X64 only currently, X86 builds but crashes on startup.
5) Tested with llvm/clang 6.0.0
6) Requires visual studio integration, available at https://github.com/LazyDodo/llvm-vs2017-integration
7) The Microsoft compiler spawns a few copies of cl in parallel to get faster build times, clang doesn't, so the build time is 3-4x slower than with msvc.
8) No openmp support yet. Have not looked at this much, the binary distribution of clang doesn't seem to include it on windows.
9) No ASAN support yet, some of the sanitizers can be made to work, but it was decided to leave support out of this commit.
Reviewers: campbellbarton
Differential Revision: https://developer.blender.org/D3304
2018-05-28 14:34:47 -06:00
|
|
|
#if defined(__GNUC__) || defined(__clang__)
|
2017-10-06 21:47:41 +02:00
|
|
|
static ccl_always_inline
|
|
|
|
|
#else
|
|
|
|
|
static ccl_never_inline
|
|
|
|
|
#endif
|
2022-03-23 15:45:32 +01:00
|
|
|
OutT
|
2020-11-06 15:19:58 +01:00
|
|
|
interp_3d_cubic(const TextureInfo &info, float x, float y, float z)
|
2017-10-06 21:47:41 +02:00
|
|
|
{
|
|
|
|
|
int width = info.width;
|
|
|
|
|
int height = info.height;
|
|
|
|
|
int depth = info.depth;
|
|
|
|
|
int ix, iy, iz;
|
2022-03-17 16:01:39 +01:00
|
|
|
|
|
|
|
|
/* A -0.5 offset is used to center the cubic samples around the sample point. */
|
2017-10-06 21:47:41 +02:00
|
|
|
const float tx = frac(x * (float)width - 0.5f, &ix);
|
|
|
|
|
const float ty = frac(y * (float)height - 0.5f, &iy);
|
|
|
|
|
const float tz = frac(z * (float)depth - 0.5f, &iz);
|
2022-03-17 16:01:39 +01:00
|
|
|
|
|
|
|
|
int pix, piy, piz;
|
|
|
|
|
int nix, niy, niz;
|
|
|
|
|
int nnix, nniy, nniz;
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2017-10-06 21:47:41 +02:00
|
|
|
switch (info.extension) {
|
|
|
|
|
case EXTENSION_REPEAT:
|
|
|
|
|
ix = wrap_periodic(ix, width);
|
|
|
|
|
pix = wrap_periodic(ix - 1, width);
|
|
|
|
|
nix = wrap_periodic(ix + 1, width);
|
|
|
|
|
nnix = wrap_periodic(ix + 2, width);
|
2022-03-17 16:01:39 +01:00
|
|
|
|
|
|
|
|
iy = wrap_periodic(iy, height);
|
|
|
|
|
niy = wrap_periodic(iy + 1, height);
|
|
|
|
|
piy = wrap_periodic(iy - 1, height);
|
2017-10-06 21:47:41 +02:00
|
|
|
nniy = wrap_periodic(iy + 2, height);
|
2022-03-17 16:01:39 +01:00
|
|
|
|
|
|
|
|
iz = wrap_periodic(iz, depth);
|
|
|
|
|
piz = wrap_periodic(iz - 1, depth);
|
|
|
|
|
niz = wrap_periodic(iz + 1, depth);
|
2017-10-06 21:47:41 +02:00
|
|
|
nniz = wrap_periodic(iz + 2, depth);
|
|
|
|
|
break;
|
2022-03-17 16:01:39 +01:00
|
|
|
case EXTENSION_CLIP: {
|
|
|
|
|
/* No cubic samples are inside the clip region. */
|
|
|
|
|
if (ix < -2 || ix > width || iy < -2 || iy > height || iz < -2 || iz > depth) {
|
2022-03-23 22:46:17 +01:00
|
|
|
return zero();
|
2017-10-06 21:47:41 +02:00
|
|
|
}
|
2022-03-17 16:01:39 +01:00
|
|
|
|
|
|
|
|
pix = ix - 1;
|
|
|
|
|
nnix = ix + 2;
|
|
|
|
|
nix = ix + 1;
|
|
|
|
|
|
|
|
|
|
piy = iy - 1;
|
|
|
|
|
niy = iy + 1;
|
|
|
|
|
nniy = iy + 2;
|
|
|
|
|
|
|
|
|
|
piz = iz - 1;
|
|
|
|
|
niz = iz + 1;
|
|
|
|
|
nniz = iz + 2;
|
|
|
|
|
|
|
|
|
|
/* All cubic samples are inside the clip region. */
|
|
|
|
|
if (pix >= 0 && nnix < width && piy >= 0 && nniy < height && piz >= 0 && nniz < depth) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* The Cubic samples span the clip border.
|
|
|
|
|
* read_clip is used to ensure proper interpolation across the clip border. */
|
|
|
|
|
const int xc[4] = {pix, ix, nix, nnix};
|
|
|
|
|
const int yc[4] = {piy, iy, niy, nniy};
|
|
|
|
|
const int zc[4] = {piz, iz, niz, nniz};
|
|
|
|
|
return tricubic_lookup(
|
2022-03-23 15:45:32 +01:00
|
|
|
(const TexT *)info.data, tx, ty, tz, xc, yc, zc, width, height, depth, read_clip);
|
2022-03-17 16:01:39 +01:00
|
|
|
}
|
2017-10-06 21:47:41 +02:00
|
|
|
case EXTENSION_EXTEND:
|
|
|
|
|
pix = wrap_clamp(ix - 1, width);
|
|
|
|
|
nix = wrap_clamp(ix + 1, width);
|
|
|
|
|
nnix = wrap_clamp(ix + 2, width);
|
|
|
|
|
ix = wrap_clamp(ix, width);
|
2022-03-17 16:01:39 +01:00
|
|
|
|
|
|
|
|
piy = wrap_clamp(iy - 1, height);
|
|
|
|
|
niy = wrap_clamp(iy + 1, height);
|
|
|
|
|
nniy = wrap_clamp(iy + 2, height);
|
2017-10-06 21:47:41 +02:00
|
|
|
iy = wrap_clamp(iy, height);
|
2022-03-17 16:01:39 +01:00
|
|
|
|
|
|
|
|
piz = wrap_clamp(iz - 1, depth);
|
|
|
|
|
niz = wrap_clamp(iz + 1, depth);
|
|
|
|
|
nniz = wrap_clamp(iz + 2, depth);
|
2017-10-06 21:47:41 +02:00
|
|
|
iz = wrap_clamp(iz, depth);
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
kernel_assert(0);
|
2022-03-23 22:46:17 +01:00
|
|
|
return zero();
|
2017-10-06 21:47:41 +02:00
|
|
|
}
|
|
|
|
|
const int xc[4] = {pix, ix, nix, nnix};
|
2022-03-17 16:01:39 +01:00
|
|
|
const int yc[4] = {piy, iy, niy, nniy};
|
|
|
|
|
const int zc[4] = {piz, iz, niz, nniz};
|
2022-03-23 15:45:32 +01:00
|
|
|
const TexT *data = (const TexT *)info.data;
|
2022-03-17 16:01:39 +01:00
|
|
|
return tricubic_lookup(data, tx, ty, tz, xc, yc, zc, width, height, depth, read);
|
2017-10-06 21:47:41 +02:00
|
|
|
}
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2022-03-23 15:45:32 +01:00
|
|
|
static ccl_always_inline OutT
|
2017-10-06 21:47:41 +02:00
|
|
|
interp_3d(const TextureInfo &info, float x, float y, float z, InterpolationType interp)
|
|
|
|
|
{
|
2017-10-07 02:15:12 +02:00
|
|
|
switch ((interp == INTERPOLATION_NONE) ? info.interpolation : interp) {
|
2017-10-06 21:47:41 +02:00
|
|
|
case INTERPOLATION_CLOSEST:
|
|
|
|
|
return interp_3d_closest(info, x, y, z);
|
|
|
|
|
case INTERPOLATION_LINEAR:
|
|
|
|
|
return interp_3d_linear(info, x, y, z);
|
|
|
|
|
default:
|
2020-11-06 15:19:58 +01:00
|
|
|
return interp_3d_cubic(info, x, y, z);
|
2017-10-06 21:47:41 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
2020-10-02 17:40:28 +02:00
|
|
|
#ifdef WITH_NANOVDB
|
2022-03-23 15:45:32 +01:00
|
|
|
template<typename TexT, typename OutT = float4> struct NanoVDBInterpolator {
|
2020-11-06 15:19:58 +01:00
|
|
|
|
2022-03-23 15:45:32 +01:00
|
|
|
typedef typename nanovdb::NanoGrid<TexT>::AccessorType AccessorType;
|
2020-11-06 15:19:58 +01:00
|
|
|
|
2022-03-23 15:45:32 +01:00
|
|
|
static ccl_always_inline float read(float r)
|
2020-10-02 17:40:28 +02:00
|
|
|
{
|
2022-03-23 15:45:32 +01:00
|
|
|
return r;
|
2020-10-02 17:40:28 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static ccl_always_inline float4 read(nanovdb::Vec3f r)
|
|
|
|
|
{
|
|
|
|
|
return make_float4(r[0], r[1], r[2], 1.0f);
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-23 15:45:32 +01:00
|
|
|
static ccl_always_inline OutT interp_3d_closest(const AccessorType &acc,
|
|
|
|
|
float x,
|
|
|
|
|
float y,
|
|
|
|
|
float z)
|
2020-11-06 15:19:58 +01:00
|
|
|
{
|
|
|
|
|
const nanovdb::Vec3f xyz(x, y, z);
|
2020-11-10 18:28:14 +01:00
|
|
|
return read(nanovdb::SampleFromVoxels<AccessorType, 0, false>(acc)(xyz));
|
2020-11-06 15:19:58 +01:00
|
|
|
}
|
|
|
|
|
|
2022-03-23 15:45:32 +01:00
|
|
|
static ccl_always_inline OutT interp_3d_linear(const AccessorType &acc,
|
|
|
|
|
float x,
|
|
|
|
|
float y,
|
|
|
|
|
float z)
|
2020-11-06 15:19:58 +01:00
|
|
|
{
|
|
|
|
|
const nanovdb::Vec3f xyz(x - 0.5f, y - 0.5f, z - 0.5f);
|
2020-11-10 18:28:14 +01:00
|
|
|
return read(nanovdb::SampleFromVoxels<AccessorType, 1, false>(acc)(xyz));
|
2020-11-06 15:19:58 +01:00
|
|
|
}
|
|
|
|
|
|
2022-03-17 16:01:39 +01:00
|
|
|
/* Tricubic b-spline interpolation. */
|
2020-11-06 15:19:58 +01:00
|
|
|
# if defined(__GNUC__) || defined(__clang__)
|
|
|
|
|
static ccl_always_inline
|
|
|
|
|
# else
|
|
|
|
|
static ccl_never_inline
|
|
|
|
|
# endif
|
2022-03-23 15:45:32 +01:00
|
|
|
OutT
|
2020-11-10 18:28:14 +01:00
|
|
|
interp_3d_cubic(const AccessorType &acc, float x, float y, float z)
|
2020-11-06 15:19:58 +01:00
|
|
|
{
|
|
|
|
|
int ix, iy, iz;
|
|
|
|
|
int nix, niy, niz;
|
|
|
|
|
int pix, piy, piz;
|
|
|
|
|
int nnix, nniy, nniz;
|
2022-03-17 16:01:39 +01:00
|
|
|
|
|
|
|
|
/* A -0.5 offset is used to center the cubic samples around the sample point. */
|
2020-11-06 15:19:58 +01:00
|
|
|
const float tx = frac(x - 0.5f, &ix);
|
|
|
|
|
const float ty = frac(y - 0.5f, &iy);
|
|
|
|
|
const float tz = frac(z - 0.5f, &iz);
|
2022-03-17 16:01:39 +01:00
|
|
|
|
2020-11-06 15:19:58 +01:00
|
|
|
pix = ix - 1;
|
|
|
|
|
piy = iy - 1;
|
|
|
|
|
piz = iz - 1;
|
|
|
|
|
nix = ix + 1;
|
|
|
|
|
niy = iy + 1;
|
|
|
|
|
niz = iz + 1;
|
|
|
|
|
nnix = ix + 2;
|
|
|
|
|
nniy = iy + 2;
|
|
|
|
|
nniz = iz + 2;
|
|
|
|
|
|
|
|
|
|
const int xc[4] = {pix, ix, nix, nnix};
|
|
|
|
|
const int yc[4] = {piy, iy, niy, nniy};
|
|
|
|
|
const int zc[4] = {piz, iz, niz, nniz};
|
|
|
|
|
float u[4], v[4], w[4];
|
|
|
|
|
|
2022-03-17 16:01:39 +01:00
|
|
|
/* Some helper macros to keep code size reasonable.
|
|
|
|
|
* Lets the compiler inline all the matrix multiplications.
|
2020-11-06 15:19:58 +01:00
|
|
|
*/
|
|
|
|
|
# define DATA(x, y, z) (read(acc.getValue(nanovdb::Coord(xc[x], yc[y], zc[z]))))
|
|
|
|
|
# define COL_TERM(col, row) \
|
|
|
|
|
(v[col] * (u[0] * DATA(0, col, row) + u[1] * DATA(1, col, row) + u[2] * DATA(2, col, row) + \
|
|
|
|
|
u[3] * DATA(3, col, row)))
|
|
|
|
|
# define ROW_TERM(row) \
|
|
|
|
|
(w[row] * (COL_TERM(0, row) + COL_TERM(1, row) + COL_TERM(2, row) + COL_TERM(3, row)))
|
|
|
|
|
|
|
|
|
|
SET_CUBIC_SPLINE_WEIGHTS(u, tx);
|
|
|
|
|
SET_CUBIC_SPLINE_WEIGHTS(v, ty);
|
|
|
|
|
SET_CUBIC_SPLINE_WEIGHTS(w, tz);
|
|
|
|
|
|
|
|
|
|
/* Actual interpolation. */
|
|
|
|
|
return ROW_TERM(0) + ROW_TERM(1) + ROW_TERM(2) + ROW_TERM(3);
|
|
|
|
|
|
|
|
|
|
# undef COL_TERM
|
|
|
|
|
# undef ROW_TERM
|
|
|
|
|
# undef DATA
|
|
|
|
|
}
|
|
|
|
|
|
2022-03-23 15:45:32 +01:00
|
|
|
static ccl_always_inline OutT
|
2020-10-02 17:40:28 +02:00
|
|
|
interp_3d(const TextureInfo &info, float x, float y, float z, InterpolationType interp)
|
|
|
|
|
{
|
2020-11-06 15:19:58 +01:00
|
|
|
using namespace nanovdb;
|
|
|
|
|
|
2022-03-23 15:45:32 +01:00
|
|
|
NanoGrid<TexT> *const grid = (NanoGrid<TexT> *)info.data;
|
2020-11-10 18:28:14 +01:00
|
|
|
AccessorType acc = grid->getAccessor();
|
2020-10-02 17:40:28 +02:00
|
|
|
|
|
|
|
|
switch ((interp == INTERPOLATION_NONE) ? info.interpolation : interp) {
|
|
|
|
|
case INTERPOLATION_CLOSEST:
|
2020-11-10 18:28:14 +01:00
|
|
|
return interp_3d_closest(acc, x, y, z);
|
2020-11-04 15:09:06 +01:00
|
|
|
case INTERPOLATION_LINEAR:
|
2020-11-10 18:28:14 +01:00
|
|
|
return interp_3d_linear(acc, x, y, z);
|
2020-11-04 15:09:06 +01:00
|
|
|
default:
|
2020-11-10 18:28:14 +01:00
|
|
|
return interp_3d_cubic(acc, x, y, z);
|
2020-10-02 17:40:28 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
#endif
|
|
|
|
|
|
2020-11-06 15:19:58 +01:00
|
|
|
#undef SET_CUBIC_SPLINE_WEIGHTS
|
|
|
|
|
|
2021-10-17 16:10:10 +02:00
|
|
|
ccl_device float4 kernel_tex_image_interp(KernelGlobals kg, int id, float x, float y)
|
2016-05-20 16:46:49 +02:00
|
|
|
{
|
2022-06-17 17:16:37 +02:00
|
|
|
const TextureInfo &info = kernel_data_fetch(texture_info, id);
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2022-03-23 15:45:32 +01:00
|
|
|
if (UNLIKELY(!info.data)) {
|
|
|
|
|
return zero_float4();
|
|
|
|
|
}
|
|
|
|
|
|
2020-02-26 17:31:33 +01:00
|
|
|
switch (info.data_type) {
|
2022-03-23 15:45:32 +01:00
|
|
|
case IMAGE_DATA_TYPE_HALF: {
|
|
|
|
|
const float f = TextureInterpolator<half, float>::interp(info, x, y);
|
|
|
|
|
return make_float4(f, f, f, 1.0f);
|
|
|
|
|
}
|
|
|
|
|
case IMAGE_DATA_TYPE_BYTE: {
|
|
|
|
|
const float f = TextureInterpolator<uchar, float>::interp(info, x, y);
|
|
|
|
|
return make_float4(f, f, f, 1.0f);
|
|
|
|
|
}
|
|
|
|
|
case IMAGE_DATA_TYPE_USHORT: {
|
|
|
|
|
const float f = TextureInterpolator<uint16_t, float>::interp(info, x, y);
|
|
|
|
|
return make_float4(f, f, f, 1.0f);
|
|
|
|
|
}
|
|
|
|
|
case IMAGE_DATA_TYPE_FLOAT: {
|
|
|
|
|
const float f = TextureInterpolator<float, float>::interp(info, x, y);
|
|
|
|
|
return make_float4(f, f, f, 1.0f);
|
|
|
|
|
}
|
Unlimited number of textures for Cycles
This patch allows for an unlimited number of textures in Cycles where the hardware allows. It replaces a number static arrays with dynamic arrays and changes the way the flat_slot indices are calculated. Eventually, I'd like to get to a point where there are only flat slots left and textures off all kinds are stored in a single array.
Note that the arrays in DeviceScene are changed from containing device_vector<T> objects to device_vector<T>* pointers. Ideally, I'd like to store objects, but dynamic resizing of a std:vector in pre-C++11 calls the copy constructor, which for a good reason is not implemented for device_vector. Once we require C++11 for Cycles builds, we can implement a move constructor for device_vector and store objects again.
The limits for CUDA Fermi hardware still apply.
Reviewers: tod_baudais, InsigMathK, dingto, #cycles
Reviewed By: dingto, #cycles
Subscribers: dingto, smellslikedonkey
Differential Revision: https://developer.blender.org/D2650
2017-04-27 09:34:51 +02:00
|
|
|
case IMAGE_DATA_TYPE_HALF4:
|
2017-10-06 21:47:41 +02:00
|
|
|
return TextureInterpolator<half4>::interp(info, x, y);
|
Unlimited number of textures for Cycles
This patch allows for an unlimited number of textures in Cycles where the hardware allows. It replaces a number static arrays with dynamic arrays and changes the way the flat_slot indices are calculated. Eventually, I'd like to get to a point where there are only flat slots left and textures off all kinds are stored in a single array.
Note that the arrays in DeviceScene are changed from containing device_vector<T> objects to device_vector<T>* pointers. Ideally, I'd like to store objects, but dynamic resizing of a std:vector in pre-C++11 calls the copy constructor, which for a good reason is not implemented for device_vector. Once we require C++11 for Cycles builds, we can implement a move constructor for device_vector and store objects again.
The limits for CUDA Fermi hardware still apply.
Reviewers: tod_baudais, InsigMathK, dingto, #cycles
Reviewed By: dingto, #cycles
Subscribers: dingto, smellslikedonkey
Differential Revision: https://developer.blender.org/D2650
2017-04-27 09:34:51 +02:00
|
|
|
case IMAGE_DATA_TYPE_BYTE4:
|
2017-10-06 21:47:41 +02:00
|
|
|
return TextureInterpolator<uchar4>::interp(info, x, y);
|
2018-07-05 12:37:52 +02:00
|
|
|
case IMAGE_DATA_TYPE_USHORT4:
|
|
|
|
|
return TextureInterpolator<ushort4>::interp(info, x, y);
|
Unlimited number of textures for Cycles
This patch allows for an unlimited number of textures in Cycles where the hardware allows. It replaces a number static arrays with dynamic arrays and changes the way the flat_slot indices are calculated. Eventually, I'd like to get to a point where there are only flat slots left and textures off all kinds are stored in a single array.
Note that the arrays in DeviceScene are changed from containing device_vector<T> objects to device_vector<T>* pointers. Ideally, I'd like to store objects, but dynamic resizing of a std:vector in pre-C++11 calls the copy constructor, which for a good reason is not implemented for device_vector. Once we require C++11 for Cycles builds, we can implement a move constructor for device_vector and store objects again.
The limits for CUDA Fermi hardware still apply.
Reviewers: tod_baudais, InsigMathK, dingto, #cycles
Reviewed By: dingto, #cycles
Subscribers: dingto, smellslikedonkey
Differential Revision: https://developer.blender.org/D2650
2017-04-27 09:34:51 +02:00
|
|
|
case IMAGE_DATA_TYPE_FLOAT4:
|
2017-10-06 21:47:41 +02:00
|
|
|
return TextureInterpolator<float4>::interp(info, x, y);
|
2018-07-05 12:37:52 +02:00
|
|
|
default:
|
|
|
|
|
assert(0);
|
|
|
|
|
return make_float4(
|
|
|
|
|
TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B, TEX_IMAGE_MISSING_A);
|
Unlimited number of textures for Cycles
This patch allows for an unlimited number of textures in Cycles where the hardware allows. It replaces a number static arrays with dynamic arrays and changes the way the flat_slot indices are calculated. Eventually, I'd like to get to a point where there are only flat slots left and textures off all kinds are stored in a single array.
Note that the arrays in DeviceScene are changed from containing device_vector<T> objects to device_vector<T>* pointers. Ideally, I'd like to store objects, but dynamic resizing of a std:vector in pre-C++11 calls the copy constructor, which for a good reason is not implemented for device_vector. Once we require C++11 for Cycles builds, we can implement a move constructor for device_vector and store objects again.
The limits for CUDA Fermi hardware still apply.
Reviewers: tod_baudais, InsigMathK, dingto, #cycles
Reviewed By: dingto, #cycles
Subscribers: dingto, smellslikedonkey
Differential Revision: https://developer.blender.org/D2650
2017-04-27 09:34:51 +02:00
|
|
|
}
|
2016-05-20 16:46:49 +02:00
|
|
|
}
|
|
|
|
|
|
2021-10-17 16:10:10 +02:00
|
|
|
ccl_device float4 kernel_tex_image_interp_3d(KernelGlobals kg,
|
2020-03-17 16:48:00 +01:00
|
|
|
int id,
|
|
|
|
|
float3 P,
|
|
|
|
|
InterpolationType interp)
|
2016-05-20 16:46:49 +02:00
|
|
|
{
|
2022-06-17 17:16:37 +02:00
|
|
|
const TextureInfo &info = kernel_data_fetch(texture_info, id);
|
2019-04-17 06:17:24 +02:00
|
|
|
|
2022-03-23 15:45:32 +01:00
|
|
|
if (UNLIKELY(!info.data)) {
|
|
|
|
|
return zero_float4();
|
|
|
|
|
}
|
|
|
|
|
|
2020-03-17 16:48:00 +01:00
|
|
|
if (info.use_transform_3d) {
|
|
|
|
|
P = transform_point(&info.transform_3d, P);
|
|
|
|
|
}
|
2020-02-26 17:31:33 +01:00
|
|
|
switch (info.data_type) {
|
2022-03-23 15:45:32 +01:00
|
|
|
case IMAGE_DATA_TYPE_HALF: {
|
|
|
|
|
const float f = TextureInterpolator<half, float>::interp_3d(info, P.x, P.y, P.z, interp);
|
|
|
|
|
return make_float4(f, f, f, 1.0f);
|
|
|
|
|
}
|
|
|
|
|
case IMAGE_DATA_TYPE_BYTE: {
|
|
|
|
|
const float f = TextureInterpolator<uchar, float>::interp_3d(info, P.x, P.y, P.z, interp);
|
|
|
|
|
return make_float4(f, f, f, 1.0f);
|
|
|
|
|
}
|
|
|
|
|
case IMAGE_DATA_TYPE_USHORT: {
|
|
|
|
|
const float f = TextureInterpolator<uint16_t, float>::interp_3d(info, P.x, P.y, P.z, interp);
|
|
|
|
|
return make_float4(f, f, f, 1.0f);
|
|
|
|
|
}
|
|
|
|
|
case IMAGE_DATA_TYPE_FLOAT: {
|
|
|
|
|
const float f = TextureInterpolator<float, float>::interp_3d(info, P.x, P.y, P.z, interp);
|
|
|
|
|
return make_float4(f, f, f, 1.0f);
|
|
|
|
|
}
|
Unlimited number of textures for Cycles
This patch allows for an unlimited number of textures in Cycles where the hardware allows. It replaces a number static arrays with dynamic arrays and changes the way the flat_slot indices are calculated. Eventually, I'd like to get to a point where there are only flat slots left and textures off all kinds are stored in a single array.
Note that the arrays in DeviceScene are changed from containing device_vector<T> objects to device_vector<T>* pointers. Ideally, I'd like to store objects, but dynamic resizing of a std:vector in pre-C++11 calls the copy constructor, which for a good reason is not implemented for device_vector. Once we require C++11 for Cycles builds, we can implement a move constructor for device_vector and store objects again.
The limits for CUDA Fermi hardware still apply.
Reviewers: tod_baudais, InsigMathK, dingto, #cycles
Reviewed By: dingto, #cycles
Subscribers: dingto, smellslikedonkey
Differential Revision: https://developer.blender.org/D2650
2017-04-27 09:34:51 +02:00
|
|
|
case IMAGE_DATA_TYPE_HALF4:
|
2020-03-17 16:48:00 +01:00
|
|
|
return TextureInterpolator<half4>::interp_3d(info, P.x, P.y, P.z, interp);
|
Unlimited number of textures for Cycles
This patch allows for an unlimited number of textures in Cycles where the hardware allows. It replaces a number static arrays with dynamic arrays and changes the way the flat_slot indices are calculated. Eventually, I'd like to get to a point where there are only flat slots left and textures off all kinds are stored in a single array.
Note that the arrays in DeviceScene are changed from containing device_vector<T> objects to device_vector<T>* pointers. Ideally, I'd like to store objects, but dynamic resizing of a std:vector in pre-C++11 calls the copy constructor, which for a good reason is not implemented for device_vector. Once we require C++11 for Cycles builds, we can implement a move constructor for device_vector and store objects again.
The limits for CUDA Fermi hardware still apply.
Reviewers: tod_baudais, InsigMathK, dingto, #cycles
Reviewed By: dingto, #cycles
Subscribers: dingto, smellslikedonkey
Differential Revision: https://developer.blender.org/D2650
2017-04-27 09:34:51 +02:00
|
|
|
case IMAGE_DATA_TYPE_BYTE4:
|
2020-03-17 16:48:00 +01:00
|
|
|
return TextureInterpolator<uchar4>::interp_3d(info, P.x, P.y, P.z, interp);
|
2018-07-05 12:37:52 +02:00
|
|
|
case IMAGE_DATA_TYPE_USHORT4:
|
2020-03-17 16:48:00 +01:00
|
|
|
return TextureInterpolator<ushort4>::interp_3d(info, P.x, P.y, P.z, interp);
|
Unlimited number of textures for Cycles
This patch allows for an unlimited number of textures in Cycles where the hardware allows. It replaces a number static arrays with dynamic arrays and changes the way the flat_slot indices are calculated. Eventually, I'd like to get to a point where there are only flat slots left and textures off all kinds are stored in a single array.
Note that the arrays in DeviceScene are changed from containing device_vector<T> objects to device_vector<T>* pointers. Ideally, I'd like to store objects, but dynamic resizing of a std:vector in pre-C++11 calls the copy constructor, which for a good reason is not implemented for device_vector. Once we require C++11 for Cycles builds, we can implement a move constructor for device_vector and store objects again.
The limits for CUDA Fermi hardware still apply.
Reviewers: tod_baudais, InsigMathK, dingto, #cycles
Reviewed By: dingto, #cycles
Subscribers: dingto, smellslikedonkey
Differential Revision: https://developer.blender.org/D2650
2017-04-27 09:34:51 +02:00
|
|
|
case IMAGE_DATA_TYPE_FLOAT4:
|
2020-03-17 16:48:00 +01:00
|
|
|
return TextureInterpolator<float4>::interp_3d(info, P.x, P.y, P.z, interp);
|
2020-10-02 17:40:28 +02:00
|
|
|
#ifdef WITH_NANOVDB
|
2022-03-23 15:45:32 +01:00
|
|
|
case IMAGE_DATA_TYPE_NANOVDB_FLOAT: {
|
|
|
|
|
const float f = NanoVDBInterpolator<float, float>::interp_3d(info, P.x, P.y, P.z, interp);
|
|
|
|
|
return make_float4(f, f, f, 1.0f);
|
|
|
|
|
}
|
2020-10-02 17:40:28 +02:00
|
|
|
case IMAGE_DATA_TYPE_NANOVDB_FLOAT3:
|
|
|
|
|
return NanoVDBInterpolator<nanovdb::Vec3f>::interp_3d(info, P.x, P.y, P.z, interp);
|
2022-05-20 18:01:26 +02:00
|
|
|
case IMAGE_DATA_TYPE_NANOVDB_FPN: {
|
2022-05-24 15:53:02 +02:00
|
|
|
const float f = NanoVDBInterpolator<nanovdb::FpN, float>::interp_3d(
|
|
|
|
|
info, P.x, P.y, P.z, interp);
|
2022-05-20 18:01:26 +02:00
|
|
|
return make_float4(f, f, f, 1.0f);
|
|
|
|
|
}
|
|
|
|
|
case IMAGE_DATA_TYPE_NANOVDB_FP16: {
|
2022-05-24 15:53:02 +02:00
|
|
|
const float f = NanoVDBInterpolator<nanovdb::Fp16, float>::interp_3d(
|
|
|
|
|
info, P.x, P.y, P.z, interp);
|
2022-05-20 18:01:26 +02:00
|
|
|
return make_float4(f, f, f, 1.0f);
|
|
|
|
|
}
|
2020-10-02 17:40:28 +02:00
|
|
|
#endif
|
2018-07-05 12:37:52 +02:00
|
|
|
default:
|
|
|
|
|
assert(0);
|
|
|
|
|
return make_float4(
|
|
|
|
|
TEX_IMAGE_MISSING_R, TEX_IMAGE_MISSING_G, TEX_IMAGE_MISSING_B, TEX_IMAGE_MISSING_A);
|
Unlimited number of textures for Cycles
This patch allows for an unlimited number of textures in Cycles where the hardware allows. It replaces a number static arrays with dynamic arrays and changes the way the flat_slot indices are calculated. Eventually, I'd like to get to a point where there are only flat slots left and textures off all kinds are stored in a single array.
Note that the arrays in DeviceScene are changed from containing device_vector<T> objects to device_vector<T>* pointers. Ideally, I'd like to store objects, but dynamic resizing of a std:vector in pre-C++11 calls the copy constructor, which for a good reason is not implemented for device_vector. Once we require C++11 for Cycles builds, we can implement a move constructor for device_vector and store objects again.
The limits for CUDA Fermi hardware still apply.
Reviewers: tod_baudais, InsigMathK, dingto, #cycles
Reviewed By: dingto, #cycles
Subscribers: dingto, smellslikedonkey
Differential Revision: https://developer.blender.org/D2650
2017-04-27 09:34:51 +02:00
|
|
|
}
|
2016-05-20 16:46:49 +02:00
|
|
|
}
|
|
|
|
|
|
2019-08-16 16:27:15 +02:00
|
|
|
} /* Namespace. */
|
|
|
|
|
|
2016-05-20 16:46:49 +02:00
|
|
|
CCL_NAMESPACE_END
|